relay_server/statsd.rs
1use relay_statsd::{CounterMetric, DistributionMetric, GaugeMetric, TimerMetric};
2#[cfg(doc)]
3use relay_system::RuntimeMetrics;
4
5/// Gauge metrics used by Relay
6pub enum RelayGauges {
7 /// Tracks the number of futures waiting to be executed in the pool's queue.
8 ///
9 /// Useful for understanding the backlog of work and identifying potential bottlenecks.
10 ///
11 /// This metric is tagged with:
12 /// - `pool`: the name of the pool.
13 AsyncPoolQueueSize,
14 /// Tracks the utilization of the async pool.
15 ///
16 /// The utilization is a value between 0.0 and 100.0 which determines how busy the pool is doing
17 /// CPU-bound work.
18 ///
19 /// This metric is tagged with:
20 /// - `pool`: the name of the pool.
21 AsyncPoolUtilization,
22 /// Tracks the activity of the async pool.
23 ///
24 /// The activity is a value between 0.0 and 100.0 which determines how busy is the pool
25 /// w.r.t. to its provisioned capacity.
26 ///
27 /// This metric is tagged with:
28 /// - `pool`: the name of the pool.
29 AsyncPoolActivity,
30 /// The state of Relay with respect to the upstream connection.
31 /// Possible values are `0` for normal operations and `1` for a network outage.
32 NetworkOutage,
33 /// Number of elements in the envelope buffer across all the stacks.
34 ///
35 /// This metric is tagged with:
36 /// - `storage_type`: The type of storage used in the envelope buffer.
37 BufferEnvelopesCount,
38 /// The number of individual stacks in the priority queue.
39 ///
40 /// Per combination of `(own_key, sampling_key)`, a new stack is created.
41 BufferStackCount,
42 /// The used disk for the buffer.
43 BufferDiskUsed,
44 /// The currently used memory by the entire system.
45 ///
46 /// Relay uses the same value for its memory health check.
47 SystemMemoryUsed,
48 /// The total system memory.
49 ///
50 /// Relay uses the same value for its memory health check.
51 SystemMemoryTotal,
52 /// The number of connections currently being managed by the Redis Pool.
53 #[cfg(feature = "processing")]
54 RedisPoolConnections,
55 /// The number of idle connections in the Redis Pool.
56 #[cfg(feature = "processing")]
57 RedisPoolIdleConnections,
58 /// The maximum number of connections in the Redis pool.
59 #[cfg(feature = "processing")]
60 RedisPoolMaxConnections,
61 /// The number of futures waiting to grab a connection.
62 #[cfg(feature = "processing")]
63 RedisPoolWaitingForConnection,
64 /// The number of notifications in the broadcast channel of the project cache.
65 ProjectCacheNotificationChannel,
66 /// The number of scheduled and in progress fetches in the project cache.
67 ProjectCacheScheduledFetches,
68 /// Exposes the amount of currently open and handled connections by the server.
69 ServerActiveConnections,
70 /// Maximum delay of a metric bucket in seconds.
71 ///
72 /// The maximum is measured from initial creation of the bucket in an internal Relay
73 /// until it is produced to Kafka.
74 ///
75 /// This metric is tagged with:
76 /// - `namespace`: the metric namespace.
77 #[cfg(feature = "processing")]
78 MetricDelayMax,
79 /// Estimated percentage [0-100] of how busy Relay's internal services are.
80 ///
81 /// This metric is tagged with:
82 /// - `service`: the service name.
83 /// - `instance_id`: a for the service name unique identifier for the running service
84 ServiceUtilization,
85 /// Number of attachment uploads currently in flight.
86 #[cfg(feature = "processing")]
87 ConcurrentAttachmentUploads,
88}
89
90impl GaugeMetric for RelayGauges {
91 fn name(&self) -> &'static str {
92 match self {
93 Self::AsyncPoolQueueSize => "async_pool.queue_size",
94 Self::AsyncPoolUtilization => "async_pool.utilization",
95 Self::AsyncPoolActivity => "async_pool.activity",
96 Self::NetworkOutage => "upstream.network_outage",
97 Self::BufferEnvelopesCount => "buffer.envelopes_count",
98 Self::BufferStackCount => "buffer.stack_count",
99 Self::BufferDiskUsed => "buffer.disk_used",
100 Self::SystemMemoryUsed => "health.system_memory.used",
101 Self::SystemMemoryTotal => "health.system_memory.total",
102 #[cfg(feature = "processing")]
103 Self::RedisPoolConnections => "redis.pool.connections",
104 #[cfg(feature = "processing")]
105 Self::RedisPoolIdleConnections => "redis.pool.idle_connections",
106 #[cfg(feature = "processing")]
107 Self::RedisPoolMaxConnections => "redis.pool.max_connections",
108 #[cfg(feature = "processing")]
109 Self::RedisPoolWaitingForConnection => "redis.pool.waiting_for_connection",
110 Self::ProjectCacheNotificationChannel => "project_cache.notification_channel.size",
111 Self::ProjectCacheScheduledFetches => "project_cache.fetches.size",
112 Self::ServerActiveConnections => "server.http.connections",
113 #[cfg(feature = "processing")]
114 Self::MetricDelayMax => "metrics.delay.max",
115 Self::ServiceUtilization => "service.utilization",
116 #[cfg(feature = "processing")]
117 Self::ConcurrentAttachmentUploads => "attachment.upload.concurrent",
118 }
119 }
120}
121
122/// Gauge metrics collected from the Runtime.
123pub enum RuntimeGauges {
124 /// Exposes [`RuntimeMetrics::num_idle_threads`].
125 NumIdleThreads,
126 /// Exposes [`RuntimeMetrics::num_alive_tasks`].
127 NumAliveTasks,
128 /// Exposes [`RuntimeMetrics::blocking_queue_depth`].
129 BlockingQueueDepth,
130 /// Exposes [`RuntimeMetrics::num_blocking_threads`].
131 NumBlockingThreads,
132 /// Exposes [`RuntimeMetrics::num_idle_blocking_threads`].
133 NumIdleBlockingThreads,
134 /// Exposes [`RuntimeMetrics::num_workers`].
135 NumWorkers,
136 /// Exposes [`RuntimeMetrics::worker_local_queue_depth`].
137 ///
138 /// This metric is tagged with:
139 /// - `worker`: the worker id.
140 WorkerLocalQueueDepth,
141 /// Exposes [`RuntimeMetrics::worker_mean_poll_time`].
142 ///
143 /// This metric is tagged with:
144 /// - `worker`: the worker id.
145 WorkerMeanPollTime,
146}
147
148impl GaugeMetric for RuntimeGauges {
149 fn name(&self) -> &'static str {
150 match self {
151 RuntimeGauges::NumIdleThreads => "runtime.idle_threads",
152 RuntimeGauges::NumAliveTasks => "runtime.alive_tasks",
153 RuntimeGauges::BlockingQueueDepth => "runtime.blocking_queue_depth",
154 RuntimeGauges::NumBlockingThreads => "runtime.num_blocking_threads",
155 RuntimeGauges::NumIdleBlockingThreads => "runtime.num_idle_blocking_threads",
156 RuntimeGauges::NumWorkers => "runtime.num_workers",
157 RuntimeGauges::WorkerLocalQueueDepth => "runtime.worker_local_queue_depth",
158 RuntimeGauges::WorkerMeanPollTime => "runtime.worker_mean_poll_time",
159 }
160 }
161}
162
163/// Counter metrics collected from the Runtime.
164pub enum RuntimeCounters {
165 /// Exposes [`RuntimeMetrics::budget_forced_yield_count`].
166 BudgetForcedYieldCount,
167 /// Exposes [`RuntimeMetrics::worker_local_schedule_count`].
168 ///
169 /// This metric is tagged with:
170 /// - `worker`: the worker id.
171 WorkerLocalScheduleCount,
172 /// Exposes [`RuntimeMetrics::worker_noop_count`].
173 ///
174 /// This metric is tagged with:
175 /// - `worker`: the worker id.
176 WorkerNoopCount,
177 /// Exposes [`RuntimeMetrics::worker_overflow_count`].
178 ///
179 /// This metric is tagged with:
180 /// - `worker`: the worker id.
181 WorkerOverflowCount,
182 /// Exposes [`RuntimeMetrics::worker_park_count`].
183 ///
184 /// This metric is tagged with:
185 /// - `worker`: the worker id.
186 WorkerParkCount,
187 /// Exposes [`RuntimeMetrics::worker_poll_count`].
188 ///
189 /// This metric is tagged with:
190 /// - `worker`: the worker id.
191 WorkerPollCount,
192 /// Exposes [`RuntimeMetrics::worker_steal_count`].
193 ///
194 /// This metric is tagged with:
195 /// - `worker`: the worker id.
196 WorkerStealCount,
197 /// Exposes [`RuntimeMetrics::worker_steal_operations`].
198 ///
199 /// This metric is tagged with:
200 /// - `worker`: the worker id.
201 WorkerStealOperations,
202 /// Exposes [`RuntimeMetrics::worker_total_busy_duration`].
203 ///
204 /// This metric is tagged with:
205 /// - `worker`: the worker id.
206 WorkerTotalBusyDuration,
207}
208
209impl CounterMetric for RuntimeCounters {
210 fn name(&self) -> &'static str {
211 match self {
212 RuntimeCounters::BudgetForcedYieldCount => "runtime.budget_forced_yield_count",
213 RuntimeCounters::WorkerLocalScheduleCount => "runtime.worker_local_schedule_count",
214 RuntimeCounters::WorkerNoopCount => "runtime.worker_noop_count",
215 RuntimeCounters::WorkerOverflowCount => "runtime.worker_overflow_count",
216 RuntimeCounters::WorkerParkCount => "runtime.worker_park_count",
217 RuntimeCounters::WorkerPollCount => "runtime.worker_poll_count",
218 RuntimeCounters::WorkerStealCount => "runtime.worker_steal_count",
219 RuntimeCounters::WorkerStealOperations => "runtime.worker_steal_operations",
220 RuntimeCounters::WorkerTotalBusyDuration => "runtime.worker_total_busy_duration",
221 }
222 }
223}
224
225/// Histogram metrics used by Relay.
226pub enum RelayDistributions {
227 /// The number of bytes received by Relay for each individual envelope item type.
228 ///
229 /// This metric is tagged with:
230 /// - `item_type`: The type of the items being counted.
231 /// - `is_container`: Whether this item is a container holding multiple items.
232 EnvelopeItemSize,
233 /// The amount of bytes in the item payloads of an envelope pushed to the envelope buffer.
234 ///
235 /// This is not quite the same as the actual size of a serialized envelope, because it ignores
236 /// the envelope header and item headers.
237 BufferEnvelopeBodySize,
238 /// Size of a serialized envelope pushed to the envelope buffer.
239 BufferEnvelopeSize,
240 /// Size of a compressed envelope pushed to the envelope buffer.
241 BufferEnvelopeSizeCompressed,
242 /// The number of batches emitted per partition.
243 BatchesPerPartition,
244 /// The number of buckets in a batch emitted.
245 ///
246 /// This corresponds to the number of buckets that will end up in an envelope.
247 BucketsPerBatch,
248 /// The number of spans per processed transaction event.
249 ///
250 /// This metric is tagged with:
251 /// - `platform`: The event's platform, such as `"javascript"`.
252 /// - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
253 /// Sentry's SDKs and defaults to "proprietary".
254 EventSpans,
255 /// Number of projects in the in-memory project cache that are waiting for their state to be
256 /// updated.
257 ///
258 /// See `project_cache.size` for more description of the project cache.
259 ProjectStatePending,
260 /// Number of project states **requested** from the upstream for each batch request.
261 ///
262 /// If multiple batches are updated concurrently, this metric is reported multiple times.
263 ///
264 /// The batch size can be configured with `cache.batch_size`. See `project_cache.size` for more
265 /// description of the project cache.
266 ProjectStateRequestBatchSize,
267 /// Number of project states **returned** from the upstream for each batch request.
268 ///
269 /// If multiple batches are updated concurrently, this metric is reported multiple times.
270 ///
271 /// See `project_cache.size` for more description of the project cache.
272 ProjectStateReceived,
273 /// Number of attempts required to fetch the config for a given project key.
274 ProjectStateAttempts,
275 /// Number of project states currently held in the in-memory project cache.
276 ///
277 /// The cache duration for project states can be configured with the following options:
278 ///
279 /// - `cache.project_expiry`: The time after which a project state counts as expired. It is
280 /// automatically refreshed if a request references the project after it has expired.
281 /// - `cache.project_grace_period`: The time after expiry at which the project state will still
282 /// be used to ingest events. Once the grace period expires, the cache is evicted and new
283 /// requests wait for an update.
284 ///
285 /// There is no limit to the number of cached projects.
286 ProjectStateCacheSize,
287 /// The size of the compressed project config in the redis cache, in bytes.
288 #[cfg(feature = "processing")]
289 ProjectStateSizeBytesCompressed,
290 /// The size of the uncompressed project config in the redis cache, in bytes.
291 #[cfg(feature = "processing")]
292 ProjectStateSizeBytesDecompressed,
293 /// The number of upstream requests queued up for sending.
294 ///
295 /// Relay employs connection keep-alive whenever possible. Connections are kept open for _15_
296 /// seconds of inactivity or _75_ seconds of activity. If all connections are busy, they are
297 /// queued, which is reflected in this metric.
298 ///
299 /// This metric is tagged with:
300 /// - `priority`: The queueing priority of the request, either `"high"` or `"low"`. The
301 /// priority determines precedence in executing requests.
302 ///
303 /// The number of concurrent connections can be configured with:
304 /// - `limits.max_concurrent_requests` for the overall number of connections
305 /// - `limits.max_concurrent_queries` for the number of concurrent high-priority requests
306 UpstreamMessageQueueSize,
307 /// Counts the number of retries for each upstream http request.
308 ///
309 /// This metric is tagged with:
310 ///
311 /// - `result`: What happened to the request, an enumeration with the following values:
312 /// * `success`: The request was sent and returned a success code `HTTP 2xx`
313 /// * `response_error`: The request was sent and it returned an HTTP error.
314 /// * `payload_failed`: The request was sent but there was an error in interpreting the response.
315 /// * `send_failed`: Failed to send the request due to a network error.
316 /// * `rate_limited`: The request was rate limited.
317 /// * `invalid_json`: The response could not be parsed back into JSON.
318 /// - `route`: The endpoint that was called on the upstream.
319 /// - `status-code`: The status code of the request when available, otherwise "-".
320 UpstreamRetries,
321 /// Size of envelopes sent over HTTP in bytes.
322 UpstreamQueryBodySize,
323 /// Size of queries (projectconfig queries, i.e. the request payload, not the response) sent by
324 /// Relay over HTTP in bytes.
325 UpstreamEnvelopeBodySize,
326 /// Size of batched global metrics requests sent by Relay over HTTP in bytes.
327 UpstreamMetricsBodySize,
328 /// Distribution of flush buckets over partition keys.
329 ///
330 /// The distribution of buckets should be even.
331 /// If it is not, this metric should expose it.
332 PartitionKeys,
333 /// Measures how many splits were performed when sending out a partition.
334 PartitionSplits,
335 /// Canonical size of a Trace Item.
336 ///
337 /// This is not the size in bytes, this is using the same algorithm we're using for the logs
338 /// billing category.
339 ///
340 /// This metric is tagged with:
341 /// - `item`: the trace item type.
342 /// - `too_large`: `true` or `false`, whether the item is bigger than the allowed size limit.
343 TraceItemCanonicalSize,
344}
345
346impl DistributionMetric for RelayDistributions {
347 fn name(&self) -> &'static str {
348 match self {
349 Self::EnvelopeItemSize => "event.item_size",
350 Self::EventSpans => "event.spans",
351 Self::BatchesPerPartition => "metrics.buckets.batches_per_partition",
352 Self::BucketsPerBatch => "metrics.buckets.per_batch",
353 Self::BufferEnvelopeBodySize => "buffer.envelope_body_size",
354 Self::BufferEnvelopeSize => "buffer.envelope_size",
355 Self::BufferEnvelopeSizeCompressed => "buffer.envelope_size.compressed",
356 Self::ProjectStatePending => "project_state.pending",
357 Self::ProjectStateAttempts => "project_state.attempts",
358 Self::ProjectStateRequestBatchSize => "project_state.request.batch_size",
359 Self::ProjectStateReceived => "project_state.received",
360 Self::ProjectStateCacheSize => "project_cache.size",
361 #[cfg(feature = "processing")]
362 Self::ProjectStateSizeBytesCompressed => "project_state.size_bytes.compressed",
363 #[cfg(feature = "processing")]
364 Self::ProjectStateSizeBytesDecompressed => "project_state.size_bytes.decompressed",
365 Self::UpstreamMessageQueueSize => "http_queue.size",
366 Self::UpstreamRetries => "upstream.retries",
367 Self::UpstreamQueryBodySize => "upstream.query.body_size",
368 Self::UpstreamEnvelopeBodySize => "upstream.envelope.body_size",
369 Self::UpstreamMetricsBodySize => "upstream.metrics.body_size",
370 Self::PartitionKeys => "metrics.buckets.partition_keys",
371 Self::PartitionSplits => "partition_splits",
372 Self::TraceItemCanonicalSize => "trace_item.canonical_size",
373 }
374 }
375}
376
377/// Timer metrics used by Relay
378pub enum RelayTimers {
379 /// Time in milliseconds spent deserializing an event from JSON bytes into the native data
380 /// structure on which Relay operates.
381 EventProcessingDeserialize,
382 /// Time in milliseconds spent running normalization on an event. Normalization
383 /// happens before envelope filtering and metric extraction.
384 EventProcessingNormalization,
385 /// Time in milliseconds spent running inbound data filters on an event.
386 EventProcessingFiltering,
387 /// Time in milliseconds spent checking for organization, project, and DSN rate limits.
388 ///
389 /// Not all events reach this point. After an event is rate limited for the first time, the rate
390 /// limit is cached. Events coming in after this will be discarded earlier in the request queue
391 /// and do not reach the processing queue.
392 ///
393 /// This metric is tagged with:
394 /// - `type`: The type of limiter executed, `cached` or `consistent`.
395 /// - `unit`: The item/unit of work which is being rate limited, only available for new
396 /// processing pipelines.
397 EventProcessingRateLimiting,
398 /// Time in milliseconds spent in data scrubbing for the current event. Data scrubbing happens
399 /// last before serializing the event back to JSON.
400 EventProcessingPii,
401 /// Time spent converting the event from its in-memory reprsentation into a JSON string.
402 EventProcessingSerialization,
403 /// Time used to extract span metrics from an event.
404 EventProcessingSpanMetricsExtraction,
405 /// Time spent between the start of request handling and processing of the envelope.
406 ///
407 /// This includes streaming the request body, scheduling overheads, project config fetching,
408 /// batched requests and congestions in the internal processor. This does not include delays in
409 /// the incoming request (body upload) and skips all envelopes that are fast-rejected.
410 EnvelopeWaitTime,
411 /// Time in milliseconds spent in synchronous processing of envelopes.
412 ///
413 /// This timing covers the end-to-end processing in the CPU pool and comprises:
414 ///
415 /// - `event_processing.deserialize`
416 /// - `event_processing.pii`
417 /// - `event_processing.serialization`
418 ///
419 /// With Relay in processing mode, this also includes the following timings:
420 ///
421 /// - `event_processing.process`
422 /// - `event_processing.filtering`
423 /// - `event_processing.rate_limiting`
424 EnvelopeProcessingTime,
425 /// Total time in milliseconds an envelope spends in Relay from the time it is received until it
426 /// finishes processing and has been submitted to the upstream.
427 EnvelopeTotalTime,
428 /// Latency of project config updates until they reach Relay.
429 ///
430 /// The metric is calculated by using the creation timestamp of the project config
431 /// and when Relay updates its local cache with the new project config.
432 ///
433 /// No metric is emitted when Relay fetches a project config for the first time.
434 ///
435 /// This metric is tagged with:
436 /// - `delay`: Bucketed amount of seconds passed between fetches.
437 ProjectCacheUpdateLatency,
438 /// Total time spent from starting to fetch a project config update to completing the fetch.
439 ProjectCacheFetchDuration,
440 /// Total time in milliseconds spent fetching queued project configuration updates requests to
441 /// resolve.
442 ///
443 /// Relay updates projects in batches. Every update cycle, Relay requests
444 /// `limits.max_concurrent_queries * cache.batch_size` projects from the upstream. This metric
445 /// measures the wall clock time for all concurrent requests in this loop.
446 ///
447 /// Note that after an update loop has completed, there may be more projects pending updates.
448 /// This is indicated by `project_state.pending`.
449 ProjectStateRequestDuration,
450 /// Time in milliseconds required to decompress a project config from redis.
451 ///
452 /// Note that this also times the cases where project config is uncompressed,
453 /// in which case the timer should be very close to zero.
454 #[cfg(feature = "processing")]
455 ProjectStateDecompression,
456 /// Total duration in milliseconds for handling inbound web requests until the HTTP response is
457 /// returned to the client.
458 ///
459 /// This does **not** correspond to the full event ingestion time. Requests for events that are
460 /// not immediately rejected due to bad data or cached rate limits always return `200 OK`. Full
461 /// validation and normalization occur asynchronously, which is reported by
462 /// `event.processing_time`.
463 ///
464 /// This metric is tagged with:
465 /// - `method`: The HTTP method of the request.
466 /// - `route`: Unique dashed identifier of the endpoint.
467 RequestsDuration,
468 /// Time spent on minidump scrubbing.
469 ///
470 /// This is the total time spent on parsing and scrubbing the minidump. Even if no PII
471 /// scrubbing rules applied the minidump will still be parsed and the rules evaluated on
472 /// the parsed minidump, this duration is reported here with status of "n/a".
473 ///
474 /// This metric is tagged with:
475 ///
476 /// - `status`: Scrubbing status: "ok" means successful scrubbed, "error" means there
477 /// was an error during scrubbing and finally "n/a" means scrubbing was successful
478 /// but no scurbbing rules applied.
479 MinidumpScrubbing,
480 /// Time spent on view hierarchy scrubbing.
481 ///
482 /// This is the total time spent on parsing and scrubbing the view hierarchy json file.
483 ///
484 /// This metric is tagged with:
485 ///
486 /// - `status`: "ok" means successful scrubbed, "error" means there was an error during
487 /// scrubbing
488 ViewHierarchyScrubbing,
489 /// Time spend on attachment scrubbing.
490 ///
491 /// This represents the total time spent on evaluating the scrubbing rules for an
492 /// attachment and the attachment scrubbing itself, regardless of whether any rules were
493 /// applied. Note that minidumps which failed to be parsed (status="error" in
494 /// scrubbing.minidumps.duration) will be scrubbed as plain attachments and count
495 /// towards this.
496 ///
497 /// This metric is tagged with:
498 ///
499 /// - `attachment_type`: The type of attachment, e.g. "minidump".
500 AttachmentScrubbing,
501 /// Total time spent to send request to upstream Relay and handle the response.
502 ///
503 /// This metric is tagged with:
504 ///
505 /// - `result`: What happened to the request, an enumeration with the following values:
506 /// * `success`: The request was sent and returned a success code `HTTP 2xx`
507 /// * `response_error`: The request was sent and it returned an HTTP error.
508 /// * `payload_failed`: The request was sent but there was an error in interpreting the response.
509 /// * `send_failed`: Failed to send the request due to a network error.
510 /// * `rate_limited`: The request was rate limited.
511 /// * `invalid_json`: The response could not be parsed back into JSON.
512 /// - `route`: The endpoint that was called on the upstream.
513 /// - `status-code`: The status code of the request when available, otherwise "-".
514 /// - `retries`: Number of retries bucket 0, 1, 2, few (3 - 10), many (more than 10).
515 UpstreamRequestsDuration,
516 /// The delay between the timestamp stated in a payload and the receive time.
517 ///
518 /// SDKs cannot transmit payloads immediately in all cases. Sometimes, crashes require that
519 /// events are sent after restarting the application. Similarly, SDKs buffer events during
520 /// network downtimes for later transmission. This metric measures the delay between the time of
521 /// the event and the time it arrives in Relay. The delay is measured after clock drift
522 /// correction is applied.
523 ///
524 /// Only payloads with a delay of more than 1 minute are captured.
525 ///
526 /// This metric is tagged with:
527 ///
528 /// - `category`: The data category of the payload. Can be one of: `event`, `transaction`,
529 /// `security`, or `session`.
530 TimestampDelay,
531 /// The time it takes the outcome aggregator to flush aggregated outcomes.
532 OutcomeAggregatorFlushTime,
533 /// Time in milliseconds spent on parsing, normalizing and scrubbing replay recordings.
534 ReplayRecordingProcessing,
535 /// Total time spent to send a request and receive the response from upstream.
536 GlobalConfigRequestDuration,
537 /// Timing in milliseconds for processing a message in the internal CPU pool.
538 ///
539 /// This metric is tagged with:
540 ///
541 /// - `message`: The type of message that was processed.
542 ProcessMessageDuration,
543 /// Timing in milliseconds for processing a task in the project cache service.
544 ///
545 /// This metric is tagged with:
546 /// - `task`: The type of the task the project cache does.
547 ProjectCacheTaskDuration,
548 /// Timing in milliseconds for handling and responding to a health check request.
549 ///
550 /// This metric is tagged with:
551 /// - `type`: The type of the health check, `liveness` or `readiness`.
552 HealthCheckDuration,
553 /// Temporary timing metric for how much time was spent evaluating span and transaction
554 /// rate limits using the `RateLimitBuckets` message in the processor.
555 ///
556 /// This metric is tagged with:
557 /// - `category`: The data category evaluated.
558 /// - `limited`: Whether the batch is rate limited.
559 /// - `count`: How many items matching the data category are contained in the batch.
560 #[cfg(feature = "processing")]
561 RateLimitBucketsDuration,
562 /// Timing in milliseconds for processing a task in the aggregator service.
563 ///
564 /// This metric is tagged with:
565 /// - `task`: The task being executed by the aggregator.
566 /// - `aggregator`: The name of the aggregator.
567 AggregatorServiceDuration,
568 /// Timing in milliseconds for processing a message in the metric router service.
569 ///
570 /// This metric is tagged with:
571 /// - `message`: The type of message that was processed.
572 MetricRouterServiceDuration,
573 /// Timing in milliseconds for processing a message in the metric store service.
574 ///
575 /// This metric is tagged with:
576 /// - `message`: The type of message that was processed.
577 #[cfg(feature = "processing")]
578 StoreServiceDuration,
579 /// Timing in milliseconds for the time it takes for initialize the buffer.
580 BufferInitialization,
581 /// Timing in milliseconds for the time it takes for the buffer to pack & spool a batch.
582 ///
583 /// Contains the time it takes to pack multiple envelopes into a single memory blob.
584 BufferSpool,
585 /// Timing in milliseconds for the time it takes for the buffer to spool data to SQLite.
586 BufferSqlWrite,
587 /// Timing in milliseconds for the time it takes for the buffer to unspool data from disk.
588 BufferUnspool,
589 /// Timing in milliseconds for the time it takes for the buffer to push.
590 BufferPush,
591 /// Timing in milliseconds for the time it takes for the buffer to peek.
592 BufferPeek,
593 /// Timing in milliseconds for the time it takes for the buffer to pop.
594 BufferPop,
595 /// Timing in milliseconds for the time it takes for the buffer to drain its envelopes.
596 BufferDrain,
597 /// Timing in milliseconds for the time it takes for an envelope to be serialized.
598 BufferEnvelopesSerialization,
599 /// Timing in milliseconds for the time it takes for an envelope to be compressed.
600 BufferEnvelopeCompression,
601 /// Timing in milliseconds for the time it takes for an envelope to be decompressed.
602 BufferEnvelopeDecompression,
603 /// Timing in milliseconds to count spans in a serialized transaction payload.
604 CheckNestedSpans,
605 /// The time it needs to create a signature. Includes both the signature used for
606 /// trusted relays and for register challenges.
607 SignatureCreationDuration,
608 /// Time needed to upload an attachment to objectstore.
609 ///
610 /// Tagged by:
611 /// - `type`: "envelope" or "attachment_v2".
612 #[cfg(feature = "processing")]
613 AttachmentUploadDuration,
614}
615
616impl TimerMetric for RelayTimers {
617 fn name(&self) -> &'static str {
618 match self {
619 RelayTimers::EventProcessingDeserialize => "event_processing.deserialize",
620 RelayTimers::EventProcessingNormalization => "event_processing.normalization",
621 RelayTimers::EventProcessingFiltering => "event_processing.filtering",
622 RelayTimers::EventProcessingRateLimiting => "event_processing.rate_limiting",
623 RelayTimers::EventProcessingPii => "event_processing.pii",
624 RelayTimers::EventProcessingSpanMetricsExtraction => {
625 "event_processing.span_metrics_extraction"
626 }
627 RelayTimers::EventProcessingSerialization => "event_processing.serialization",
628 RelayTimers::EnvelopeWaitTime => "event.wait_time",
629 RelayTimers::EnvelopeProcessingTime => "event.processing_time",
630 RelayTimers::EnvelopeTotalTime => "event.total_time",
631 RelayTimers::ProjectStateRequestDuration => "project_state.request.duration",
632 #[cfg(feature = "processing")]
633 RelayTimers::ProjectStateDecompression => "project_state.decompression",
634 RelayTimers::ProjectCacheUpdateLatency => "project_cache.latency",
635 RelayTimers::ProjectCacheFetchDuration => "project_cache.fetch.duration",
636 RelayTimers::RequestsDuration => "requests.duration",
637 RelayTimers::MinidumpScrubbing => "scrubbing.minidumps.duration",
638 RelayTimers::ViewHierarchyScrubbing => "scrubbing.view_hierarchy_scrubbing.duration",
639 RelayTimers::AttachmentScrubbing => "scrubbing.attachments.duration",
640 RelayTimers::UpstreamRequestsDuration => "upstream.requests.duration",
641 RelayTimers::TimestampDelay => "requests.timestamp_delay",
642 RelayTimers::OutcomeAggregatorFlushTime => "outcomes.aggregator.flush_time",
643 RelayTimers::ReplayRecordingProcessing => "replay.recording.process",
644 RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration",
645 RelayTimers::ProcessMessageDuration => "processor.message.duration",
646 RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration",
647 RelayTimers::HealthCheckDuration => "health.message.duration",
648 #[cfg(feature = "processing")]
649 RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets",
650 RelayTimers::AggregatorServiceDuration => "metrics.aggregator.message.duration",
651 RelayTimers::MetricRouterServiceDuration => "metrics.router.message.duration",
652 #[cfg(feature = "processing")]
653 RelayTimers::StoreServiceDuration => "store.message.duration",
654 RelayTimers::BufferInitialization => "buffer.initialization.duration",
655 RelayTimers::BufferSpool => "buffer.spool.duration",
656 RelayTimers::BufferSqlWrite => "buffer.write.duration",
657 RelayTimers::BufferUnspool => "buffer.unspool.duration",
658 RelayTimers::BufferPush => "buffer.push.duration",
659 RelayTimers::BufferPeek => "buffer.peek.duration",
660 RelayTimers::BufferPop => "buffer.pop.duration",
661 RelayTimers::BufferDrain => "buffer.drain.duration",
662 RelayTimers::BufferEnvelopesSerialization => "buffer.envelopes_serialization",
663 RelayTimers::BufferEnvelopeCompression => "buffer.envelopes_compression",
664 RelayTimers::BufferEnvelopeDecompression => "buffer.envelopes_decompression",
665 RelayTimers::CheckNestedSpans => "envelope.check_nested_spans",
666 RelayTimers::SignatureCreationDuration => "signature.create.duration",
667 #[cfg(feature = "processing")]
668 RelayTimers::AttachmentUploadDuration => "attachment.upload.duration",
669 }
670 }
671}
672
673/// Counter metrics used by Relay
674pub enum RelayCounters {
675 /// Tracks the number of tasks driven to completion by the async pool.
676 ///
677 /// This metric is tagged with:
678 /// - `pool`: the name of the pool.
679 AsyncPoolFinishedTasks,
680 /// Number of Events that had corrupted (unprintable) event attributes.
681 ///
682 /// This currently checks for `environment` and `release`, for which we know that
683 /// some SDKs may send corrupted values.
684 EventCorrupted,
685 /// Number of envelopes accepted in the current time slot.
686 ///
687 /// This represents requests that have successfully passed rate limits and filters, and have
688 /// been sent to the upstream.
689 ///
690 /// This metric is tagged with:
691 /// - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
692 /// there was an error or bug.
693 EnvelopeAccepted,
694 /// Number of envelopes rejected in the current time slot.
695 ///
696 /// This includes envelopes being rejected because they are malformed or any other errors during
697 /// processing (including filtered events, invalid payloads, and rate limits).
698 ///
699 /// To check the rejection reason, check `events.outcomes`, instead.
700 ///
701 /// This metric is tagged with:
702 /// - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
703 /// there was an error or bug.
704 EnvelopeRejected,
705 /// Number of total envelope items we received.
706 ///
707 /// Note: This does not count raw items, it counts the logical amount of items,
708 /// e.g. a single item container counts all its contained items.
709 ///
710 /// This metric is tagged with:
711 /// - `item_type`: The type of the items being counted.
712 /// - `is_container`: Whether this item is a container holding multiple items.
713 /// - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
714 /// Sentry's SDKs and defaults to "proprietary".
715 EnvelopeItems,
716 /// Number of bytes we processed per envelope item.
717 ///
718 /// This metric is tagged with:
719 /// - `item_type`: The type of the items being counted.
720 /// - `is_container`: Whether this item is a container holding multiple items.
721 /// - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
722 /// Sentry's SDKs and defaults to "proprietary".
723 EnvelopeItemBytes,
724 /// Number of times an envelope from the buffer is trying to be popped.
725 BufferTryPop,
726 /// Number of envelopes spool to disk.
727 BufferSpooledEnvelopes,
728 /// Number of envelopes unspooled from disk.
729 BufferUnspooledEnvelopes,
730 /// Number of project changed updates received by the buffer.
731 BufferProjectChangedEvent,
732 /// Number of times one or more projects of an envelope were pending when trying to pop
733 /// their envelope.
734 BufferProjectPending,
735 /// Number of iterations of the envelope buffer service loop.
736 BufferServiceLoopIteration,
737 /// Number of outcomes and reasons for rejected Envelopes.
738 ///
739 /// This metric is tagged with:
740 /// - `outcome`: The basic cause for rejecting the event.
741 /// - `reason`: A more detailed identifier describing the rule or mechanism leading to the
742 /// outcome.
743 /// - `to`: Describes the destination of the outcome. Can be either 'kafka' (when in
744 /// processing mode) or 'http' (when outcomes are enabled in an external relay).
745 ///
746 /// Possible outcomes are:
747 /// - `filtered`: Dropped by inbound data filters. The reason specifies the filter that
748 /// matched.
749 /// - `rate_limited`: Dropped by organization, project, or DSN rate limit, as well as exceeding
750 /// the Sentry plan quota. The reason contains the rate limit or quota that was exceeded.
751 /// - `invalid`: Data was considered invalid and could not be recovered. The reason indicates
752 /// the validation that failed.
753 Outcomes,
754 /// The number of individual outcomes including their quantity.
755 ///
756 /// While [`RelayCounters::Outcomes`] tracks the number of times aggregated outcomes
757 /// have been emitted, this counter tracks the total quantity of individual outcomes.
758 OutcomeQuantity,
759 /// Number of project state HTTP requests.
760 ///
761 /// Relay updates projects in batches. Every update cycle, Relay requests
762 /// `limits.max_concurrent_queries` batches of `cache.batch_size` projects from the upstream.
763 /// The duration of these requests is reported via `project_state.request.duration`.
764 ///
765 /// Note that after an update loop has completed, there may be more projects pending updates.
766 /// This is indicated by `project_state.pending`.
767 ProjectStateRequest,
768 /// Number of times a project state is requested from the central Redis cache.
769 ///
770 /// This metric is tagged with:
771 /// - `hit`: One of:
772 /// - `revision`: the cached version was validated to be up to date using its revision.
773 /// - `project_config`: the request was handled by the cache.
774 /// - `project_config_revision`: the request was handled by the cache and the revision did
775 /// not change.
776 /// - `false`: the request will be sent to the sentry endpoint.
777 #[cfg(feature = "processing")]
778 ProjectStateRedis,
779 /// Number of times a project had a fetch scheduled.
780 ProjectCacheSchedule,
781 /// Number of times an upstream request for a project config is completed.
782 ///
783 /// Completion can be because a result was returned or because the config request was
784 /// dropped after there still was no response after a timeout. This metrics has tags
785 /// for `result` and `attempts` indicating whether it was succesful or a timeout and how
786 /// many attempts were made respectively.
787 ProjectUpstreamCompleted,
788 /// Number of times an upstream request for a project config failed.
789 ///
790 /// Failure can happen, for example, when there's a network error. Refer to
791 /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases.
792 ProjectUpstreamFailed,
793 /// Number of Relay server starts.
794 ///
795 /// This can be used to track unwanted restarts due to crashes or termination.
796 ServerStarting,
797 /// Number of messages placed on the Kafka queues.
798 ///
799 /// When Relay operates as Sentry service and an Envelope item is successfully processed, each
800 /// Envelope item results in a dedicated message on one of the ingestion topics on Kafka.
801 ///
802 /// This metric is tagged with:
803 /// - `event_type`: The kind of message produced to Kafka.
804 /// - `namespace` (only for metrics): The namespace that the metric belongs to.
805 /// - `is_segment` (only for event_type span): `true` the span is the root of a segment.
806 /// - `has_parent` (only for event_type span): `false` if the span is the root of a trace.
807 /// - `platform` (only for event_type span): The platform from which the span was spent.
808 /// - `metric_type` (only for event_type metric): The metric type, counter, distribution,
809 /// gauge or set.
810 /// - `metric_encoding` (only for event_type metric): The encoding used for distribution and
811 /// set metrics.
812 ///
813 /// The message types can be:
814 ///
815 /// - `event`: An error or transaction event. Error events are sent to `ingest-events`,
816 /// transactions to `ingest-transactions`, and errors with attachments are sent to
817 /// `ingest-attachments`.
818 /// - `attachment`: An attachment file associated with an error event, sent to
819 /// `ingest-attachments`.
820 /// - `user_report`: A message from the user feedback dialog, sent to `ingest-events`.
821 /// - `session`: A release health session update, sent to `ingest-sessions`.
822 #[cfg(feature = "processing")]
823 ProcessingMessageProduced,
824 /// Number of spans produced in the new format.
825 #[cfg(feature = "processing")]
826 SpanV2Produced,
827 /// Number of events that hit any of the store-like endpoints: Envelope, Store, Security,
828 /// Minidump, Unreal.
829 ///
830 /// The events are counted before they are rate limited, filtered, or processed in any way.
831 ///
832 /// This metric is tagged with:
833 /// - `version`: The event protocol version number defaulting to `7`.
834 EventProtocol,
835 /// The number of transaction events processed by the source of the transaction name.
836 ///
837 /// This metric is tagged with:
838 /// - `platform`: The event's platform, such as `"javascript"`.
839 /// - `source`: The source of the transaction name on the client. See the [transaction source
840 /// documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
841 /// for all valid values.
842 /// - `contains_slashes`: Whether the transaction name contains `/`. We use this as a heuristic
843 /// to represent URL transactions.
844 EventTransaction,
845 /// The number of transaction events processed grouped by transaction name modifications.
846 /// This metric is tagged with:
847 /// - `source_in`: The source of the transaction name before normalization.
848 /// See the [transaction source
849 /// documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
850 /// for all valid values.
851 /// - `change`: The mechanism that changed the transaction name.
852 /// Either `"none"`, `"pattern"`, `"rule"`, or `"both"`.
853 /// - `source_out`: The source of the transaction name after normalization.
854 TransactionNameChanges,
855 /// Number of HTTP requests reaching Relay.
856 Requests,
857 /// Number of completed HTTP requests.
858 ///
859 /// This metric is tagged with:
860 ///
861 /// - `status_code`: The HTTP status code number.
862 /// - `method`: The HTTP method used in the request in uppercase.
863 /// - `route`: Unique dashed identifier of the endpoint.
864 ResponsesStatusCodes,
865 /// Number of evicted stale projects from the cache.
866 ///
867 /// Relay scans the in-memory project cache for stale entries in a regular interval configured
868 /// by `cache.eviction_interval`.
869 ///
870 /// The cache duration for project states can be configured with the following options:
871 ///
872 /// - `cache.project_expiry`: The time after which a project state counts as expired. It is
873 /// automatically refreshed if a request references the project after it has expired.
874 /// - `cache.project_grace_period`: The time after expiry at which the project state will still
875 /// be used to ingest events. Once the grace period expires, the cache is evicted and new
876 /// requests wait for an update.
877 EvictingStaleProjectCaches,
878 /// Number of refreshes for stale projects in the cache.
879 RefreshStaleProjectCaches,
880 /// Number of times that parsing a metrics bucket item from an envelope failed.
881 MetricBucketsParsingFailed,
882 /// Count extraction of transaction names. Tag with the decision to drop / replace / use original.
883 MetricsTransactionNameExtracted,
884 /// Number of Events with an OpenTelemetry Context
885 ///
886 /// This metric is tagged with:
887 /// - `platform`: The event's platform, such as `"javascript"`.
888 /// - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
889 /// Sentry's SDKs and defaults to "proprietary".
890 OpenTelemetryEvent,
891 /// Number of global config fetches from upstream. Only 2XX responses are
892 /// considered and ignores send errors (e.g. auth or network errors).
893 ///
894 /// This metric is tagged with:
895 /// - `success`: whether deserializing the global config succeeded.
896 GlobalConfigFetched,
897 /// The number of attachments processed in the same envelope as a user_report_v2 event.
898 FeedbackAttachments,
899 /// All COGS tracked values.
900 ///
901 /// This metric is tagged with:
902 /// - `resource_id`: The COGS resource id.
903 /// - `app_feature`: The COGS app feature.
904 CogsUsage,
905 /// The amount of times metrics of a project have been flushed without the project being
906 /// fetched/available.
907 ProjectStateFlushMetricsNoProject,
908 /// Incremented every time a bucket is dropped.
909 ///
910 /// This should only happen when a project state is invalid during graceful shutdown.
911 ///
912 /// This metric is tagged with:
913 /// - `aggregator`: The name of the metrics aggregator (usually `"default"`).
914 BucketsDropped,
915 /// Incremented every time a segment exceeds the expected limit.
916 ReplayExceededSegmentLimit,
917 /// Incremented every time the server accepts a new connection.
918 ServerSocketAccept,
919 /// Incremented every time the server aborts a connection because of an idle timeout.
920 ServerConnectionIdleTimeout,
921 /// The total delay of metric buckets in seconds.
922 ///
923 /// The delay is measured from initial creation of the bucket in an internal Relay
924 /// until it is produced to Kafka.
925 ///
926 /// Use [`Self::MetricDelayCount`] to calculate the average delay.
927 ///
928 /// This metric is tagged with:
929 /// - `namespace`: the metric namespace.
930 #[cfg(feature = "processing")]
931 MetricDelaySum,
932 /// The amount of buckets counted for the [`Self::MetricDelaySum`] metric.
933 ///
934 /// This metric is tagged with:
935 /// - `namespace`: the metric namespace.
936 #[cfg(feature = "processing")]
937 MetricDelayCount,
938 /// The amount of times PlayStation processing was attempted.
939 #[cfg(all(sentry, feature = "processing"))]
940 PlaystationProcessing,
941 /// The number of times a sampling decision was made.
942 ///
943 /// This metric is tagged with:
944 /// - `item`: what item the decision is taken for (transaction vs span).
945 SamplingDecision,
946 /// The number of times an upload of an attachment occurs.
947 ///
948 /// This metric is tagged with:
949 /// - `result`: `success` or the failure reason.
950 /// - `type`: `envelope` or `attachment_v2`
951 #[cfg(feature = "processing")]
952 AttachmentUpload,
953 /// Whether a logs envelope has a trace context header or not
954 ///
955 /// This metric is tagged with:
956 /// - `dsc`: yes or no
957 /// - `sdk`: low-cardinality client name
958 EnvelopeWithLogs,
959}
960
961impl CounterMetric for RelayCounters {
962 fn name(&self) -> &'static str {
963 match self {
964 RelayCounters::AsyncPoolFinishedTasks => "async_pool.finished_tasks",
965 RelayCounters::EventCorrupted => "event.corrupted",
966 RelayCounters::EnvelopeAccepted => "event.accepted",
967 RelayCounters::EnvelopeRejected => "event.rejected",
968 RelayCounters::EnvelopeItems => "event.items",
969 RelayCounters::EnvelopeItemBytes => "event.item_bytes",
970 RelayCounters::BufferTryPop => "buffer.try_pop",
971 RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes",
972 RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes",
973 RelayCounters::BufferProjectChangedEvent => "buffer.project_changed_event",
974 RelayCounters::BufferProjectPending => "buffer.project_pending",
975 RelayCounters::BufferServiceLoopIteration => "buffer.service_loop_iteration",
976 RelayCounters::Outcomes => "events.outcomes",
977 RelayCounters::OutcomeQuantity => "events.outcome_quantity",
978 RelayCounters::ProjectStateRequest => "project_state.request",
979 #[cfg(feature = "processing")]
980 RelayCounters::ProjectStateRedis => "project_state.redis.requests",
981 RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed",
982 RelayCounters::ProjectUpstreamFailed => "project_upstream.failed",
983 RelayCounters::ProjectCacheSchedule => "project_cache.schedule",
984 RelayCounters::ServerStarting => "server.starting",
985 #[cfg(feature = "processing")]
986 RelayCounters::ProcessingMessageProduced => "processing.event.produced",
987 #[cfg(feature = "processing")]
988 RelayCounters::SpanV2Produced => "store.produced.span_v2",
989 RelayCounters::EventProtocol => "event.protocol",
990 RelayCounters::EventTransaction => "event.transaction",
991 RelayCounters::TransactionNameChanges => "event.transaction_name_changes",
992 RelayCounters::Requests => "requests",
993 RelayCounters::ResponsesStatusCodes => "responses.status_codes",
994 RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction",
995 RelayCounters::RefreshStaleProjectCaches => "project_cache.refresh",
996 RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed",
997 RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name",
998 RelayCounters::OpenTelemetryEvent => "event.opentelemetry",
999 RelayCounters::GlobalConfigFetched => "global_config.fetch",
1000 RelayCounters::FeedbackAttachments => "processing.feedback_attachments",
1001 RelayCounters::CogsUsage => "cogs.usage",
1002 RelayCounters::ProjectStateFlushMetricsNoProject => "project_state.metrics.no_project",
1003 RelayCounters::BucketsDropped => "metrics.buckets.dropped",
1004 RelayCounters::ReplayExceededSegmentLimit => "replay.segment_limit_exceeded",
1005 RelayCounters::ServerSocketAccept => "server.http.accepted",
1006 RelayCounters::ServerConnectionIdleTimeout => "server.http.idle_timeout",
1007 #[cfg(feature = "processing")]
1008 RelayCounters::MetricDelaySum => "metrics.delay.sum",
1009 #[cfg(feature = "processing")]
1010 RelayCounters::MetricDelayCount => "metrics.delay.count",
1011 #[cfg(all(sentry, feature = "processing"))]
1012 RelayCounters::PlaystationProcessing => "processing.playstation",
1013 RelayCounters::SamplingDecision => "sampling.decision",
1014 #[cfg(feature = "processing")]
1015 RelayCounters::AttachmentUpload => "attachment.upload",
1016 RelayCounters::EnvelopeWithLogs => "logs.envelope",
1017 }
1018 }
1019}