relay_server/
statsd.rs

1use relay_statsd::{CounterMetric, DistributionMetric, GaugeMetric, TimerMetric};
2#[cfg(doc)]
3use relay_system::RuntimeMetrics;
4
5/// Gauge metrics used by Relay
6pub enum RelayGauges {
7    /// Tracks the number of futures waiting to be executed in the pool's queue.
8    ///
9    /// Useful for understanding the backlog of work and identifying potential bottlenecks.
10    ///
11    /// This metric is tagged with:
12    /// - `pool`: the name of the pool.
13    AsyncPoolQueueSize,
14    /// Tracks the utilization of the async pool.
15    ///
16    /// The utilization is a value between 0.0 and 100.0 which determines how busy the pool is doing
17    /// CPU-bound work.
18    ///
19    /// This metric is tagged with:
20    /// - `pool`: the name of the pool.
21    AsyncPoolUtilization,
22    /// Tracks the activity of the async pool.
23    ///
24    /// The activity is a value between 0.0 and 100.0 which determines how busy is the pool
25    /// w.r.t. to its provisioned capacity.
26    ///
27    /// This metric is tagged with:
28    /// - `pool`: the name of the pool.
29    AsyncPoolActivity,
30    /// The state of Relay with respect to the upstream connection.
31    /// Possible values are `0` for normal operations and `1` for a network outage.
32    NetworkOutage,
33    /// The number of individual stacks in the priority queue.
34    ///
35    /// Per combination of `(own_key, sampling_key)`, a new stack is created.
36    BufferStackCount,
37    /// The used disk for the buffer.
38    BufferDiskUsed,
39    /// The currently used memory by the entire system.
40    ///
41    /// Relay uses the same value for its memory health check.
42    SystemMemoryUsed,
43    /// The total system memory.
44    ///
45    /// Relay uses the same value for its memory health check.
46    SystemMemoryTotal,
47    /// The number of connections currently being managed by the Redis Pool.
48    #[cfg(feature = "processing")]
49    RedisPoolConnections,
50    /// The number of idle connections in the Redis Pool.
51    #[cfg(feature = "processing")]
52    RedisPoolIdleConnections,
53    /// The maximum number of connections in the Redis pool.
54    #[cfg(feature = "processing")]
55    RedisPoolMaxConnections,
56    /// The number of futures waiting to grab a connection.
57    #[cfg(feature = "processing")]
58    RedisPoolWaitingForConnection,
59    /// The number of notifications in the broadcast channel of the project cache.
60    ProjectCacheNotificationChannel,
61    /// The number of scheduled and in progress fetches in the project cache.
62    ProjectCacheScheduledFetches,
63    /// Exposes the amount of currently open and handled connections by the server.
64    ServerActiveConnections,
65    /// Maximum delay of a metric bucket in seconds.
66    ///
67    /// The maximum is measured from initial creation of the bucket in an internal Relay
68    /// until it is produced to Kafka.
69    ///
70    /// This metric is tagged with:
71    /// - `namespace`: the metric namespace.
72    #[cfg(feature = "processing")]
73    MetricDelayMax,
74    /// Estimated percentage [0-100] of how busy Relay's internal services are.
75    ///
76    /// This metric is tagged with:
77    /// - `service`: the service name.
78    /// - `instance_id`: a for the service name unique identifier for the running service
79    ServiceUtilization,
80    /// Number of attachment uploads currently in flight.
81    #[cfg(feature = "processing")]
82    ConcurrentAttachmentUploads,
83}
84
85impl GaugeMetric for RelayGauges {
86    fn name(&self) -> &'static str {
87        match self {
88            RelayGauges::AsyncPoolQueueSize => "async_pool.queue_size",
89            RelayGauges::AsyncPoolUtilization => "async_pool.utilization",
90            RelayGauges::AsyncPoolActivity => "async_pool.activity",
91            RelayGauges::NetworkOutage => "upstream.network_outage",
92            RelayGauges::BufferStackCount => "buffer.stack_count",
93            RelayGauges::BufferDiskUsed => "buffer.disk_used",
94            RelayGauges::SystemMemoryUsed => "health.system_memory.used",
95            RelayGauges::SystemMemoryTotal => "health.system_memory.total",
96            #[cfg(feature = "processing")]
97            RelayGauges::RedisPoolConnections => "redis.pool.connections",
98            #[cfg(feature = "processing")]
99            RelayGauges::RedisPoolIdleConnections => "redis.pool.idle_connections",
100            #[cfg(feature = "processing")]
101            RelayGauges::RedisPoolMaxConnections => "redis.pool.max_connections",
102            #[cfg(feature = "processing")]
103            RelayGauges::RedisPoolWaitingForConnection => "redis.pool.waiting_for_connection",
104            RelayGauges::ProjectCacheNotificationChannel => {
105                "project_cache.notification_channel.size"
106            }
107            RelayGauges::ProjectCacheScheduledFetches => "project_cache.fetches.size",
108            RelayGauges::ServerActiveConnections => "server.http.connections",
109            #[cfg(feature = "processing")]
110            RelayGauges::MetricDelayMax => "metrics.delay.max",
111            RelayGauges::ServiceUtilization => "service.utilization",
112            #[cfg(feature = "processing")]
113            RelayGauges::ConcurrentAttachmentUploads => "attachment.upload.concurrent",
114        }
115    }
116}
117
118/// Gauge metrics collected from the Runtime.
119pub enum RuntimeGauges {
120    /// Exposes [`RuntimeMetrics::num_idle_threads`].
121    NumIdleThreads,
122    /// Exposes [`RuntimeMetrics::num_alive_tasks`].
123    NumAliveTasks,
124    /// Exposes [`RuntimeMetrics::blocking_queue_depth`].
125    BlockingQueueDepth,
126    /// Exposes [`RuntimeMetrics::num_blocking_threads`].
127    NumBlockingThreads,
128    /// Exposes [`RuntimeMetrics::num_idle_blocking_threads`].
129    NumIdleBlockingThreads,
130    /// Exposes [`RuntimeMetrics::num_workers`].
131    NumWorkers,
132    /// Exposes [`RuntimeMetrics::worker_local_queue_depth`].
133    ///
134    /// This metric is tagged with:
135    /// - `worker`: the worker id.
136    WorkerLocalQueueDepth,
137    /// Exposes [`RuntimeMetrics::worker_mean_poll_time`].
138    ///
139    /// This metric is tagged with:
140    /// - `worker`: the worker id.
141    WorkerMeanPollTime,
142}
143
144impl GaugeMetric for RuntimeGauges {
145    fn name(&self) -> &'static str {
146        match self {
147            RuntimeGauges::NumIdleThreads => "runtime.idle_threads",
148            RuntimeGauges::NumAliveTasks => "runtime.alive_tasks",
149            RuntimeGauges::BlockingQueueDepth => "runtime.blocking_queue_depth",
150            RuntimeGauges::NumBlockingThreads => "runtime.num_blocking_threads",
151            RuntimeGauges::NumIdleBlockingThreads => "runtime.num_idle_blocking_threads",
152            RuntimeGauges::NumWorkers => "runtime.num_workers",
153            RuntimeGauges::WorkerLocalQueueDepth => "runtime.worker_local_queue_depth",
154            RuntimeGauges::WorkerMeanPollTime => "runtime.worker_mean_poll_time",
155        }
156    }
157}
158
159/// Counter metrics collected from the Runtime.
160pub enum RuntimeCounters {
161    /// Exposes [`RuntimeMetrics::budget_forced_yield_count`].
162    BudgetForcedYieldCount,
163    /// Exposes [`RuntimeMetrics::worker_local_schedule_count`].
164    ///
165    /// This metric is tagged with:
166    /// - `worker`: the worker id.
167    WorkerLocalScheduleCount,
168    /// Exposes [`RuntimeMetrics::worker_noop_count`].
169    ///
170    /// This metric is tagged with:
171    /// - `worker`: the worker id.
172    WorkerNoopCount,
173    /// Exposes [`RuntimeMetrics::worker_overflow_count`].
174    ///
175    /// This metric is tagged with:
176    /// - `worker`: the worker id.
177    WorkerOverflowCount,
178    /// Exposes [`RuntimeMetrics::worker_park_count`].
179    ///
180    /// This metric is tagged with:
181    /// - `worker`: the worker id.
182    WorkerParkCount,
183    /// Exposes [`RuntimeMetrics::worker_poll_count`].
184    ///
185    /// This metric is tagged with:
186    /// - `worker`: the worker id.
187    WorkerPollCount,
188    /// Exposes [`RuntimeMetrics::worker_steal_count`].
189    ///
190    /// This metric is tagged with:
191    /// - `worker`: the worker id.
192    WorkerStealCount,
193    /// Exposes [`RuntimeMetrics::worker_steal_operations`].
194    ///
195    /// This metric is tagged with:
196    /// - `worker`: the worker id.
197    WorkerStealOperations,
198    /// Exposes [`RuntimeMetrics::worker_total_busy_duration`].
199    ///
200    /// This metric is tagged with:
201    /// - `worker`: the worker id.
202    WorkerTotalBusyDuration,
203}
204
205impl CounterMetric for RuntimeCounters {
206    fn name(&self) -> &'static str {
207        match self {
208            RuntimeCounters::BudgetForcedYieldCount => "runtime.budget_forced_yield_count",
209            RuntimeCounters::WorkerLocalScheduleCount => "runtime.worker_local_schedule_count",
210            RuntimeCounters::WorkerNoopCount => "runtime.worker_noop_count",
211            RuntimeCounters::WorkerOverflowCount => "runtime.worker_overflow_count",
212            RuntimeCounters::WorkerParkCount => "runtime.worker_park_count",
213            RuntimeCounters::WorkerPollCount => "runtime.worker_poll_count",
214            RuntimeCounters::WorkerStealCount => "runtime.worker_steal_count",
215            RuntimeCounters::WorkerStealOperations => "runtime.worker_steal_operations",
216            RuntimeCounters::WorkerTotalBusyDuration => "runtime.worker_total_busy_duration",
217        }
218    }
219}
220
221/// Histogram metrics used by Relay.
222pub enum RelayDistributions {
223    /// The number of bytes received by Relay for each individual envelope item type.
224    ///
225    /// This metric is tagged with:
226    ///  - `item_type`: The type of the items being counted.
227    ///  - `is_container`: Whether this item is a container holding multiple items.
228    EnvelopeItemSize,
229
230    /// Number of elements in the envelope buffer across all the stacks.
231    ///
232    /// This metric is tagged with:
233    /// - `storage_type`: The type of storage used in the envelope buffer.
234    BufferEnvelopesCount,
235    /// The amount of bytes in the item payloads of an envelope pushed to the envelope buffer.
236    ///
237    /// This is not quite the same as the actual size of a serialized envelope, because it ignores
238    /// the envelope header and item headers.
239    BufferEnvelopeBodySize,
240    /// Size of a serialized envelope pushed to the envelope buffer.
241    BufferEnvelopeSize,
242    /// Size of a compressed envelope pushed to the envelope buffer.
243    BufferEnvelopeSizeCompressed,
244    /// The number of batches emitted per partition.
245    BatchesPerPartition,
246    /// The number of buckets in a batch emitted.
247    ///
248    /// This corresponds to the number of buckets that will end up in an envelope.
249    BucketsPerBatch,
250    /// The number of spans per processed transaction event.
251    ///
252    /// This metric is tagged with:
253    ///  - `platform`: The event's platform, such as `"javascript"`.
254    ///  - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
255    ///    Sentry's SDKs and defaults to "proprietary".
256    EventSpans,
257    /// Number of projects in the in-memory project cache that are waiting for their state to be
258    /// updated.
259    ///
260    /// See `project_cache.size` for more description of the project cache.
261    ProjectStatePending,
262    /// Number of project states **requested** from the upstream for each batch request.
263    ///
264    /// If multiple batches are updated concurrently, this metric is reported multiple times.
265    ///
266    /// The batch size can be configured with `cache.batch_size`. See `project_cache.size` for more
267    /// description of the project cache.
268    ProjectStateRequestBatchSize,
269    /// Number of project states **returned** from the upstream for each batch request.
270    ///
271    /// If multiple batches are updated concurrently, this metric is reported multiple times.
272    ///
273    /// See `project_cache.size` for more description of the project cache.
274    ProjectStateReceived,
275    /// Number of attempts required to fetch the config for a given project key.
276    ProjectStateAttempts,
277    /// Number of project states currently held in the in-memory project cache.
278    ///
279    /// The cache duration for project states can be configured with the following options:
280    ///
281    ///  - `cache.project_expiry`: The time after which a project state counts as expired. It is
282    ///    automatically refreshed if a request references the project after it has expired.
283    ///  - `cache.project_grace_period`: The time after expiry at which the project state will still
284    ///    be used to ingest events. Once the grace period expires, the cache is evicted and new
285    ///    requests wait for an update.
286    ///
287    /// There is no limit to the number of cached projects.
288    ProjectStateCacheSize,
289    /// The size of the compressed project config in the redis cache, in bytes.
290    #[cfg(feature = "processing")]
291    ProjectStateSizeBytesCompressed,
292    /// The size of the uncompressed project config in the redis cache, in bytes.
293    #[cfg(feature = "processing")]
294    ProjectStateSizeBytesDecompressed,
295    /// The number of upstream requests queued up for sending.
296    ///
297    /// Relay employs connection keep-alive whenever possible. Connections are kept open for _15_
298    /// seconds of inactivity or _75_ seconds of activity. If all connections are busy, they are
299    /// queued, which is reflected in this metric.
300    ///
301    /// This metric is tagged with:
302    ///  - `priority`: The queueing priority of the request, either `"high"` or `"low"`. The
303    ///    priority determines precedence in executing requests.
304    ///
305    /// The number of concurrent connections can be configured with:
306    ///  - `limits.max_concurrent_requests` for the overall number of connections
307    ///  - `limits.max_concurrent_queries` for the number of concurrent high-priority requests
308    UpstreamMessageQueueSize,
309    /// Counts the number of retries for each upstream http request.
310    ///
311    /// This metric is tagged with:
312    ///
313    ///   - `result`: What happened to the request, an enumeration with the following values:
314    ///     * `success`: The request was sent and returned a success code `HTTP 2xx`
315    ///     * `response_error`: The request was sent and it returned an HTTP error.
316    ///     * `payload_failed`: The request was sent but there was an error in interpreting the response.
317    ///     * `send_failed`: Failed to send the request due to a network error.
318    ///     * `rate_limited`: The request was rate limited.
319    ///     * `invalid_json`: The response could not be parsed back into JSON.
320    ///   - `route`: The endpoint that was called on the upstream.
321    ///   - `status-code`: The status code of the request when available, otherwise "-".
322    UpstreamRetries,
323    /// Size of envelopes sent over HTTP in bytes.
324    UpstreamQueryBodySize,
325    /// Size of queries (projectconfig queries, i.e. the request payload, not the response) sent by
326    /// Relay over HTTP in bytes.
327    UpstreamEnvelopeBodySize,
328    /// Size of batched global metrics requests sent by Relay over HTTP in bytes.
329    UpstreamMetricsBodySize,
330    /// Distribution of flush buckets over partition keys.
331    ///
332    /// The distribution of buckets should be even.
333    /// If it is not, this metric should expose it.
334    PartitionKeys,
335    /// Measures how many splits were performed when sending out a partition.
336    PartitionSplits,
337}
338
339impl DistributionMetric for RelayDistributions {
340    fn name(&self) -> &'static str {
341        match self {
342            Self::EnvelopeItemSize => "event.item_size",
343            Self::EventSpans => "event.spans",
344            Self::BatchesPerPartition => "metrics.buckets.batches_per_partition",
345            Self::BucketsPerBatch => "metrics.buckets.per_batch",
346            Self::BufferEnvelopesCount => "buffer.envelopes_count",
347            Self::BufferEnvelopeBodySize => "buffer.envelope_body_size",
348            Self::BufferEnvelopeSize => "buffer.envelope_size",
349            Self::BufferEnvelopeSizeCompressed => "buffer.envelope_size.compressed",
350            Self::ProjectStatePending => "project_state.pending",
351            Self::ProjectStateAttempts => "project_state.attempts",
352            Self::ProjectStateRequestBatchSize => "project_state.request.batch_size",
353            Self::ProjectStateReceived => "project_state.received",
354            Self::ProjectStateCacheSize => "project_cache.size",
355            #[cfg(feature = "processing")]
356            Self::ProjectStateSizeBytesCompressed => "project_state.size_bytes.compressed",
357            #[cfg(feature = "processing")]
358            Self::ProjectStateSizeBytesDecompressed => "project_state.size_bytes.decompressed",
359            Self::UpstreamMessageQueueSize => "http_queue.size",
360            Self::UpstreamRetries => "upstream.retries",
361            Self::UpstreamQueryBodySize => "upstream.query.body_size",
362            Self::UpstreamEnvelopeBodySize => "upstream.envelope.body_size",
363            Self::UpstreamMetricsBodySize => "upstream.metrics.body_size",
364            Self::PartitionKeys => "metrics.buckets.partition_keys",
365            Self::PartitionSplits => "partition_splits",
366        }
367    }
368}
369
370/// Timer metrics used by Relay
371pub enum RelayTimers {
372    /// Time in milliseconds spent deserializing an event from JSON bytes into the native data
373    /// structure on which Relay operates.
374    EventProcessingDeserialize,
375    /// Time in milliseconds spent running normalization on an event. Normalization
376    /// happens before envelope filtering and metric extraction.
377    EventProcessingNormalization,
378    /// Time in milliseconds spent running inbound data filters on an event.
379    EventProcessingFiltering,
380    /// Time in milliseconds spent checking for organization, project, and DSN rate limits.
381    ///
382    /// Not all events reach this point. After an event is rate limited for the first time, the rate
383    /// limit is cached. Events coming in after this will be discarded earlier in the request queue
384    /// and do not reach the processing queue.
385    ///
386    /// This metric is tagged with:
387    ///  - `type`: The type of limiter executed, `cached` or `consistent`.
388    ///  - `unit`: The item/unit of work which is being rate limited, only available for new
389    ///    processing pipelines.
390    EventProcessingRateLimiting,
391    /// Time in milliseconds spent in data scrubbing for the current event. Data scrubbing happens
392    /// last before serializing the event back to JSON.
393    EventProcessingPii,
394    /// Time spent converting the event from its in-memory reprsentation into a JSON string.
395    EventProcessingSerialization,
396    /// Time used to extract span metrics from an event.
397    EventProcessingSpanMetricsExtraction,
398    /// Time spent between the start of request handling and processing of the envelope.
399    ///
400    /// This includes streaming the request body, scheduling overheads, project config fetching,
401    /// batched requests and congestions in the internal processor. This does not include delays in
402    /// the incoming request (body upload) and skips all envelopes that are fast-rejected.
403    EnvelopeWaitTime,
404    /// Time in milliseconds spent in synchronous processing of envelopes.
405    ///
406    /// This timing covers the end-to-end processing in the CPU pool and comprises:
407    ///
408    ///  - `event_processing.deserialize`
409    ///  - `event_processing.pii`
410    ///  - `event_processing.serialization`
411    ///
412    /// With Relay in processing mode, this also includes the following timings:
413    ///
414    ///  - `event_processing.process`
415    ///  - `event_processing.filtering`
416    ///  - `event_processing.rate_limiting`
417    EnvelopeProcessingTime,
418    /// Total time in milliseconds an envelope spends in Relay from the time it is received until it
419    /// finishes processing and has been submitted to the upstream.
420    EnvelopeTotalTime,
421    /// Latency of project config updates until they reach Relay.
422    ///
423    /// The metric is calculated by using the creation timestamp of the project config
424    /// and when Relay updates its local cache with the new project config.
425    ///
426    /// No metric is emitted when Relay fetches a project config for the first time.
427    ///
428    /// This metric is tagged with:
429    ///  - `delay`: Bucketed amount of seconds passed between fetches.
430    ProjectCacheUpdateLatency,
431    /// Total time spent from starting to fetch a project config update to completing the fetch.
432    ProjectCacheFetchDuration,
433    /// Total time in milliseconds spent fetching queued project configuration updates requests to
434    /// resolve.
435    ///
436    /// Relay updates projects in batches. Every update cycle, Relay requests
437    /// `limits.max_concurrent_queries * cache.batch_size` projects from the upstream. This metric
438    /// measures the wall clock time for all concurrent requests in this loop.
439    ///
440    /// Note that after an update loop has completed, there may be more projects pending updates.
441    /// This is indicated by `project_state.pending`.
442    ProjectStateRequestDuration,
443    /// Time in milliseconds required to decompress a project config from redis.
444    ///
445    /// Note that this also times the cases where project config is uncompressed,
446    /// in which case the timer should be very close to zero.
447    #[cfg(feature = "processing")]
448    ProjectStateDecompression,
449    /// Total duration in milliseconds for handling inbound web requests until the HTTP response is
450    /// returned to the client.
451    ///
452    /// This does **not** correspond to the full event ingestion time. Requests for events that are
453    /// not immediately rejected due to bad data or cached rate limits always return `200 OK`. Full
454    /// validation and normalization occur asynchronously, which is reported by
455    /// `event.processing_time`.
456    ///
457    /// This metric is tagged with:
458    ///  - `method`: The HTTP method of the request.
459    ///  - `route`: Unique dashed identifier of the endpoint.
460    RequestsDuration,
461    /// Time spent on minidump scrubbing.
462    ///
463    /// This is the total time spent on parsing and scrubbing the minidump.  Even if no PII
464    /// scrubbing rules applied the minidump will still be parsed and the rules evaluated on
465    /// the parsed minidump, this duration is reported here with status of "n/a".
466    ///
467    /// This metric is tagged with:
468    ///
469    /// - `status`: Scrubbing status: "ok" means successful scrubbed, "error" means there
470    ///   was an error during scrubbing and finally "n/a" means scrubbing was successful
471    ///   but no scurbbing rules applied.
472    MinidumpScrubbing,
473    /// Time spent on view hierarchy scrubbing.
474    ///
475    /// This is the total time spent on parsing and scrubbing the view hierarchy json file.
476    ///
477    /// This metric is tagged with:
478    ///
479    /// - `status`: "ok" means successful scrubbed, "error" means there was an error during
480    ///   scrubbing
481    ViewHierarchyScrubbing,
482    /// Time spend on attachment scrubbing.
483    ///
484    /// This represents the total time spent on evaluating the scrubbing rules for an
485    /// attachment and the attachment scrubbing itself, regardless of whether any rules were
486    /// applied.  Note that minidumps which failed to be parsed (status="error" in
487    /// scrubbing.minidumps.duration) will be scrubbed as plain attachments and count
488    /// towards this.
489    ///
490    /// This metric is tagged with:
491    ///
492    ///   - `attachment_type`: The type of attachment, e.g. "minidump".
493    AttachmentScrubbing,
494    /// Total time spent to send request to upstream Relay and handle the response.
495    ///
496    /// This metric is tagged with:
497    ///
498    ///   - `result`: What happened to the request, an enumeration with the following values:
499    ///     * `success`: The request was sent and returned a success code `HTTP 2xx`
500    ///     * `response_error`: The request was sent and it returned an HTTP error.
501    ///     * `payload_failed`: The request was sent but there was an error in interpreting the response.
502    ///     * `send_failed`: Failed to send the request due to a network error.
503    ///     * `rate_limited`: The request was rate limited.
504    ///     * `invalid_json`: The response could not be parsed back into JSON.
505    ///   - `route`: The endpoint that was called on the upstream.
506    ///   - `status-code`: The status code of the request when available, otherwise "-".
507    ///   - `retries`: Number of retries bucket 0, 1, 2, few (3 - 10), many (more than 10).
508    UpstreamRequestsDuration,
509    /// The delay between the timestamp stated in a payload and the receive time.
510    ///
511    /// SDKs cannot transmit payloads immediately in all cases. Sometimes, crashes require that
512    /// events are sent after restarting the application. Similarly, SDKs buffer events during
513    /// network downtimes for later transmission. This metric measures the delay between the time of
514    /// the event and the time it arrives in Relay. The delay is measured after clock drift
515    /// correction is applied.
516    ///
517    /// Only payloads with a delay of more than 1 minute are captured.
518    ///
519    /// This metric is tagged with:
520    ///
521    ///  - `category`: The data category of the payload. Can be one of: `event`, `transaction`,
522    ///    `security`, or `session`.
523    TimestampDelay,
524    /// The time it takes the outcome aggregator to flush aggregated outcomes.
525    OutcomeAggregatorFlushTime,
526    /// Time in milliseconds spent on parsing, normalizing and scrubbing replay recordings.
527    ReplayRecordingProcessing,
528    /// Total time spent to send a request and receive the response from upstream.
529    GlobalConfigRequestDuration,
530    /// Timing in milliseconds for processing a message in the internal CPU pool.
531    ///
532    /// This metric is tagged with:
533    ///
534    ///  - `message`: The type of message that was processed.
535    ProcessMessageDuration,
536    /// Timing in milliseconds for processing a task in the project cache service.
537    ///
538    /// This metric is tagged with:
539    /// - `task`: The type of the task the project cache does.
540    ProjectCacheTaskDuration,
541    /// Timing in milliseconds for handling and responding to a health check request.
542    ///
543    /// This metric is tagged with:
544    ///  - `type`: The type of the health check, `liveness` or `readiness`.
545    HealthCheckDuration,
546    /// Temporary timing metric for how much time was spent evaluating span and transaction
547    /// rate limits using the `RateLimitBuckets` message in the processor.
548    ///
549    /// This metric is tagged with:
550    ///  - `category`: The data category evaluated.
551    ///  - `limited`: Whether the batch is rate limited.
552    ///  - `count`: How many items matching the data category are contained in the batch.
553    #[cfg(feature = "processing")]
554    RateLimitBucketsDuration,
555    /// Timing in milliseconds for processing a task in the aggregator service.
556    ///
557    /// This metric is tagged with:
558    ///  - `task`: The task being executed by the aggregator.
559    ///  - `aggregator`: The name of the aggregator.
560    AggregatorServiceDuration,
561    /// Timing in milliseconds for processing a message in the metric router service.
562    ///
563    /// This metric is tagged with:
564    ///  - `message`: The type of message that was processed.
565    MetricRouterServiceDuration,
566    /// Timing in milliseconds for processing a message in the metric store service.
567    ///
568    /// This metric is tagged with:
569    ///  - `message`: The type of message that was processed.
570    #[cfg(feature = "processing")]
571    StoreServiceDuration,
572    /// Timing in milliseconds for the time it takes for initialize the buffer.
573    BufferInitialization,
574    /// Timing in milliseconds for the time it takes for the buffer to pack & spool a batch.
575    ///
576    /// Contains the time it takes to pack multiple envelopes into a single memory blob.
577    BufferSpool,
578    /// Timing in milliseconds for the time it takes for the buffer to spool data to SQLite.
579    BufferSqlWrite,
580    /// Timing in milliseconds for the time it takes for the buffer to unspool data from disk.
581    BufferUnspool,
582    /// Timing in milliseconds for the time it takes for the buffer to push.
583    BufferPush,
584    /// Timing in milliseconds for the time it takes for the buffer to peek.
585    BufferPeek,
586    /// Timing in milliseconds for the time it takes for the buffer to pop.
587    BufferPop,
588    /// Timing in milliseconds for the time it takes for the buffer to drain its envelopes.
589    BufferDrain,
590    /// Timing in milliseconds for the time it takes for an envelope to be serialized.
591    BufferEnvelopesSerialization,
592    /// Timing in milliseconds for the time it takes for an envelope to be compressed.
593    BufferEnvelopeCompression,
594    /// Timing in milliseconds for the time it takes for an envelope to be decompressed.
595    BufferEnvelopeDecompression,
596    /// Timing in milliseconds to the time it takes to read an HTTP body.
597    BodyReadDuration,
598    /// Timing in milliseconds to count spans in a serialized transaction payload.
599    CheckNestedSpans,
600    /// The time it needs to create a signature. Includes both the signature used for
601    /// trusted relays and for register challenges.
602    SignatureCreationDuration,
603}
604
605impl TimerMetric for RelayTimers {
606    fn name(&self) -> &'static str {
607        match self {
608            RelayTimers::EventProcessingDeserialize => "event_processing.deserialize",
609            RelayTimers::EventProcessingNormalization => "event_processing.normalization",
610            RelayTimers::EventProcessingFiltering => "event_processing.filtering",
611            RelayTimers::EventProcessingRateLimiting => "event_processing.rate_limiting",
612            RelayTimers::EventProcessingPii => "event_processing.pii",
613            RelayTimers::EventProcessingSpanMetricsExtraction => {
614                "event_processing.span_metrics_extraction"
615            }
616            RelayTimers::EventProcessingSerialization => "event_processing.serialization",
617            RelayTimers::EnvelopeWaitTime => "event.wait_time",
618            RelayTimers::EnvelopeProcessingTime => "event.processing_time",
619            RelayTimers::EnvelopeTotalTime => "event.total_time",
620            RelayTimers::ProjectStateRequestDuration => "project_state.request.duration",
621            #[cfg(feature = "processing")]
622            RelayTimers::ProjectStateDecompression => "project_state.decompression",
623            RelayTimers::ProjectCacheUpdateLatency => "project_cache.latency",
624            RelayTimers::ProjectCacheFetchDuration => "project_cache.fetch.duration",
625            RelayTimers::RequestsDuration => "requests.duration",
626            RelayTimers::MinidumpScrubbing => "scrubbing.minidumps.duration",
627            RelayTimers::ViewHierarchyScrubbing => "scrubbing.view_hierarchy_scrubbing.duration",
628            RelayTimers::AttachmentScrubbing => "scrubbing.attachments.duration",
629            RelayTimers::UpstreamRequestsDuration => "upstream.requests.duration",
630            RelayTimers::TimestampDelay => "requests.timestamp_delay",
631            RelayTimers::OutcomeAggregatorFlushTime => "outcomes.aggregator.flush_time",
632            RelayTimers::ReplayRecordingProcessing => "replay.recording.process",
633            RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration",
634            RelayTimers::ProcessMessageDuration => "processor.message.duration",
635            RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration",
636            RelayTimers::HealthCheckDuration => "health.message.duration",
637            #[cfg(feature = "processing")]
638            RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets",
639            RelayTimers::AggregatorServiceDuration => "metrics.aggregator.message.duration",
640            RelayTimers::MetricRouterServiceDuration => "metrics.router.message.duration",
641            #[cfg(feature = "processing")]
642            RelayTimers::StoreServiceDuration => "store.message.duration",
643            RelayTimers::BufferInitialization => "buffer.initialization.duration",
644            RelayTimers::BufferSpool => "buffer.spool.duration",
645            RelayTimers::BufferSqlWrite => "buffer.write.duration",
646            RelayTimers::BufferUnspool => "buffer.unspool.duration",
647            RelayTimers::BufferPush => "buffer.push.duration",
648            RelayTimers::BufferPeek => "buffer.peek.duration",
649            RelayTimers::BufferPop => "buffer.pop.duration",
650            RelayTimers::BufferDrain => "buffer.drain.duration",
651            RelayTimers::BufferEnvelopesSerialization => "buffer.envelopes_serialization",
652            RelayTimers::BufferEnvelopeCompression => "buffer.envelopes_compression",
653            RelayTimers::BufferEnvelopeDecompression => "buffer.envelopes_decompression",
654            RelayTimers::BodyReadDuration => "requests.body_read.duration",
655            RelayTimers::CheckNestedSpans => "envelope.check_nested_spans",
656            RelayTimers::SignatureCreationDuration => "signature.create.duration",
657        }
658    }
659}
660
661/// Counter metrics used by Relay
662pub enum RelayCounters {
663    /// Tracks the number of tasks driven to completion by the async pool.
664    ///
665    /// This metric is tagged with:
666    /// - `pool`: the name of the pool.
667    AsyncPoolFinishedTasks,
668    /// Number of Events that had corrupted (unprintable) event attributes.
669    ///
670    /// This currently checks for `environment` and `release`, for which we know that
671    /// some SDKs may send corrupted values.
672    EventCorrupted,
673    /// Number of envelopes accepted in the current time slot.
674    ///
675    /// This represents requests that have successfully passed rate limits and filters, and have
676    /// been sent to the upstream.
677    ///
678    /// This metric is tagged with:
679    ///  - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
680    ///    there was an error or bug.
681    EnvelopeAccepted,
682    /// Number of envelopes rejected in the current time slot.
683    ///
684    /// This includes envelopes being rejected because they are malformed or any other errors during
685    /// processing (including filtered events, invalid payloads, and rate limits).
686    ///
687    /// To check the rejection reason, check `events.outcomes`, instead.
688    ///
689    /// This metric is tagged with:
690    ///  - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
691    ///    there was an error or bug.
692    EnvelopeRejected,
693    /// Number of total envelope items we received.
694    ///
695    /// Note: This does not count raw items, it counts the logical amount of items,
696    /// e.g. a single item container counts all its contained items.
697    ///
698    /// This metric is tagged with:
699    ///  - `item_type`: The type of the items being counted.
700    ///  - `is_container`: Whether this item is a container holding multiple items.
701    ///  - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
702    ///    Sentry's SDKs and defaults to "proprietary".
703    EnvelopeItems,
704    /// Number of bytes we processed per envelope item.
705    ///
706    /// This metric is tagged with:
707    ///  - `item_type`: The type of the items being counted.
708    ///  - `is_container`: Whether this item is a container holding multiple items.
709    ///  - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
710    ///    Sentry's SDKs and defaults to "proprietary".
711    EnvelopeItemBytes,
712    /// Number of times an envelope from the buffer is trying to be popped.
713    BufferTryPop,
714    /// Number of envelopes spool to disk.
715    BufferSpooledEnvelopes,
716    /// Number of envelopes unspooled from disk.
717    BufferUnspooledEnvelopes,
718    /// Number of project changed updates received by the buffer.
719    BufferProjectChangedEvent,
720    /// Number of times one or more projects of an envelope were pending when trying to pop
721    /// their envelope.
722    BufferProjectPending,
723    /// Number of outcomes and reasons for rejected Envelopes.
724    ///
725    /// This metric is tagged with:
726    ///  - `outcome`: The basic cause for rejecting the event.
727    ///  - `reason`: A more detailed identifier describing the rule or mechanism leading to the
728    ///    outcome.
729    ///  - `to`: Describes the destination of the outcome. Can be either 'kafka' (when in
730    ///    processing mode) or 'http' (when outcomes are enabled in an external relay).
731    ///
732    /// Possible outcomes are:
733    ///  - `filtered`: Dropped by inbound data filters. The reason specifies the filter that
734    ///    matched.
735    ///  - `rate_limited`: Dropped by organization, project, or DSN rate limit, as well as exceeding
736    ///    the Sentry plan quota. The reason contains the rate limit or quota that was exceeded.
737    ///  - `invalid`: Data was considered invalid and could not be recovered. The reason indicates
738    ///    the validation that failed.
739    Outcomes,
740    /// The number of individual outcomes including their quantity.
741    ///
742    /// While [`RelayCounters::Outcomes`] tracks the number of times aggregated outcomes
743    /// have been emitted, this counter tracks the total quantity of individual outcomes.
744    OutcomeQuantity,
745    /// Number of project state HTTP requests.
746    ///
747    /// Relay updates projects in batches. Every update cycle, Relay requests
748    /// `limits.max_concurrent_queries` batches of `cache.batch_size` projects from the upstream.
749    /// The duration of these requests is reported via `project_state.request.duration`.
750    ///
751    /// Note that after an update loop has completed, there may be more projects pending updates.
752    /// This is indicated by `project_state.pending`.
753    ProjectStateRequest,
754    /// Number of times a project state is requested from the central Redis cache.
755    ///
756    /// This metric is tagged with:
757    ///  - `hit`: One of:
758    ///     - `revision`: the cached version was validated to be up to date using its revision.
759    ///     - `project_config`: the request was handled by the cache.
760    ///     - `project_config_revision`: the request was handled by the cache and the revision did
761    ///       not change.
762    ///     - `false`: the request will be sent to the sentry endpoint.
763    #[cfg(feature = "processing")]
764    ProjectStateRedis,
765    /// Number of times a project had a fetch scheduled.
766    ProjectCacheSchedule,
767    /// Number of times an upstream request for a project config is completed.
768    ///
769    /// Completion can be because a result was returned or because the config request was
770    /// dropped after there still was no response after a timeout.  This metrics has tags
771    /// for `result` and `attempts` indicating whether it was succesful or a timeout and how
772    /// many attempts were made respectively.
773    ProjectUpstreamCompleted,
774    /// Number of times an upstream request for a project config failed.
775    ///
776    /// Failure can happen, for example, when there's a network error. Refer to
777    /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases.
778    ProjectUpstreamFailed,
779    /// Number of Relay server starts.
780    ///
781    /// This can be used to track unwanted restarts due to crashes or termination.
782    ServerStarting,
783    /// Number of messages placed on the Kafka queues.
784    ///
785    /// When Relay operates as Sentry service and an Envelope item is successfully processed, each
786    /// Envelope item results in a dedicated message on one of the ingestion topics on Kafka.
787    ///
788    /// This metric is tagged with:
789    ///  - `event_type`: The kind of message produced to Kafka.
790    ///  - `namespace` (only for metrics): The namespace that the metric belongs to.
791    ///  - `is_segment` (only for event_type span): `true` the span is the root of a segment.
792    ///  - `has_parent` (only for event_type span): `false` if the span is the root of a trace.
793    ///  - `platform` (only for event_type span): The platform from which the span was spent.
794    ///  - `metric_type` (only for event_type metric): The metric type, counter, distribution,
795    ///    gauge or set.
796    ///  - `metric_encoding` (only for event_type metric): The encoding used for distribution and
797    ///    set metrics.
798    ///
799    /// The message types can be:
800    ///
801    ///  - `event`: An error or transaction event. Error events are sent to `ingest-events`,
802    ///    transactions to `ingest-transactions`, and errors with attachments are sent to
803    ///    `ingest-attachments`.
804    ///  - `attachment`: An attachment file associated with an error event, sent to
805    ///    `ingest-attachments`.
806    ///  - `user_report`: A message from the user feedback dialog, sent to `ingest-events`.
807    ///  - `session`: A release health session update, sent to `ingest-sessions`.
808    #[cfg(feature = "processing")]
809    ProcessingMessageProduced,
810    /// Number of spans produced in the new format.
811    #[cfg(feature = "processing")]
812    SpanV2Produced,
813    /// Number of events that hit any of the store-like endpoints: Envelope, Store, Security,
814    /// Minidump, Unreal.
815    ///
816    /// The events are counted before they are rate limited, filtered, or processed in any way.
817    ///
818    /// This metric is tagged with:
819    ///  - `version`: The event protocol version number defaulting to `7`.
820    EventProtocol,
821    /// The number of transaction events processed by the source of the transaction name.
822    ///
823    /// This metric is tagged with:
824    ///  - `platform`: The event's platform, such as `"javascript"`.
825    ///  - `source`: The source of the transaction name on the client. See the [transaction source
826    ///    documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
827    ///    for all valid values.
828    ///  - `contains_slashes`: Whether the transaction name contains `/`. We use this as a heuristic
829    ///    to represent URL transactions.
830    EventTransaction,
831    /// The number of transaction events processed grouped by transaction name modifications.
832    /// This metric is tagged with:
833    ///  - `source_in`: The source of the transaction name before normalization.
834    ///    See the [transaction source
835    ///    documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
836    ///    for all valid values.
837    ///  - `change`: The mechanism that changed the transaction name.
838    ///    Either `"none"`, `"pattern"`, `"rule"`, or `"both"`.
839    ///  - `source_out`: The source of the transaction name after normalization.
840    TransactionNameChanges,
841    /// Number of HTTP requests reaching Relay.
842    Requests,
843    /// Number of completed HTTP requests.
844    ///
845    /// This metric is tagged with:
846    ///
847    ///  - `status_code`: The HTTP status code number.
848    ///  - `method`: The HTTP method used in the request in uppercase.
849    ///  - `route`: Unique dashed identifier of the endpoint.
850    ResponsesStatusCodes,
851    /// Number of evicted stale projects from the cache.
852    ///
853    /// Relay scans the in-memory project cache for stale entries in a regular interval configured
854    /// by `cache.eviction_interval`.
855    ///
856    /// The cache duration for project states can be configured with the following options:
857    ///
858    ///  - `cache.project_expiry`: The time after which a project state counts as expired. It is
859    ///    automatically refreshed if a request references the project after it has expired.
860    ///  - `cache.project_grace_period`: The time after expiry at which the project state will still
861    ///    be used to ingest events. Once the grace period expires, the cache is evicted and new
862    ///    requests wait for an update.
863    EvictingStaleProjectCaches,
864    /// Number of refreshes for stale projects in the cache.
865    RefreshStaleProjectCaches,
866    /// Number of times that parsing a metrics bucket item from an envelope failed.
867    MetricBucketsParsingFailed,
868    /// Count extraction of transaction names. Tag with the decision to drop / replace / use original.
869    MetricsTransactionNameExtracted,
870    /// Number of Events with an OpenTelemetry Context
871    ///
872    /// This metric is tagged with:
873    ///  - `platform`: The event's platform, such as `"javascript"`.
874    ///  - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
875    ///    Sentry's SDKs and defaults to "proprietary".
876    OpenTelemetryEvent,
877    /// Number of global config fetches from upstream. Only 2XX responses are
878    /// considered and ignores send errors (e.g. auth or network errors).
879    ///
880    /// This metric is tagged with:
881    ///  - `success`: whether deserializing the global config succeeded.
882    GlobalConfigFetched,
883    /// The number of attachments processed in the same envelope as a user_report_v2 event.
884    FeedbackAttachments,
885    /// All COGS tracked values.
886    ///
887    /// This metric is tagged with:
888    /// - `resource_id`: The COGS resource id.
889    /// - `app_feature`: The COGS app feature.
890    CogsUsage,
891    /// The amount of times metrics of a project have been flushed without the project being
892    /// fetched/available.
893    ProjectStateFlushMetricsNoProject,
894    /// Incremented every time a bucket is dropped.
895    ///
896    /// This should only happen when a project state is invalid during graceful shutdown.
897    ///
898    /// This metric is tagged with:
899    ///  - `aggregator`: The name of the metrics aggregator (usually `"default"`).
900    BucketsDropped,
901    /// Incremented every time a segment exceeds the expected limit.
902    ReplayExceededSegmentLimit,
903    /// Incremented every time the server accepts a new connection.
904    ServerSocketAccept,
905    /// Incremented every time the server aborts a connection because of an idle timeout.
906    ServerConnectionIdleTimeout,
907    /// The total delay of metric buckets in seconds.
908    ///
909    /// The delay is measured from initial creation of the bucket in an internal Relay
910    /// until it is produced to Kafka.
911    ///
912    /// Use [`Self::MetricDelayCount`] to calculate the average delay.
913    ///
914    /// This metric is tagged with:
915    /// - `namespace`: the metric namespace.
916    #[cfg(feature = "processing")]
917    MetricDelaySum,
918    /// The amount of buckets counted for the [`Self::MetricDelaySum`] metric.
919    ///
920    /// This metric is tagged with:
921    /// - `namespace`: the metric namespace.
922    #[cfg(feature = "processing")]
923    MetricDelayCount,
924    /// The amount of times PlayStation processing was attempted.
925    #[cfg(all(sentry, feature = "processing"))]
926    PlaystationProcessing,
927    /// The number of times a sampling decision was made.
928    ///
929    /// This metric is tagged with:
930    /// - `item`: what item the decision is taken for (transaction vs span).
931    SamplingDecision,
932    /// The number of times an upload of an attachment occurs.
933    ///
934    /// This metric is tagged with:
935    /// - `result`: `success` or the failure reason.
936    /// - `type`: `envelope` or `attachment_v2`
937    #[cfg(feature = "processing")]
938    AttachmentUpload,
939    /// Whether a logs envelope has a trace context header or not
940    ///
941    /// This metric is tagged with:
942    /// - `dsc`: yes or no
943    EnvelopeWithLogs,
944}
945
946impl CounterMetric for RelayCounters {
947    fn name(&self) -> &'static str {
948        match self {
949            RelayCounters::AsyncPoolFinishedTasks => "async_pool.finished_tasks",
950            RelayCounters::EventCorrupted => "event.corrupted",
951            RelayCounters::EnvelopeAccepted => "event.accepted",
952            RelayCounters::EnvelopeRejected => "event.rejected",
953            RelayCounters::EnvelopeItems => "event.items",
954            RelayCounters::EnvelopeItemBytes => "event.item_bytes",
955            RelayCounters::BufferTryPop => "buffer.try_pop",
956            RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes",
957            RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes",
958            RelayCounters::BufferProjectChangedEvent => "buffer.project_changed_event",
959            RelayCounters::BufferProjectPending => "buffer.project_pending",
960            RelayCounters::Outcomes => "events.outcomes",
961            RelayCounters::OutcomeQuantity => "events.outcome_quantity",
962            RelayCounters::ProjectStateRequest => "project_state.request",
963            #[cfg(feature = "processing")]
964            RelayCounters::ProjectStateRedis => "project_state.redis.requests",
965            RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed",
966            RelayCounters::ProjectUpstreamFailed => "project_upstream.failed",
967            RelayCounters::ProjectCacheSchedule => "project_cache.schedule",
968            RelayCounters::ServerStarting => "server.starting",
969            #[cfg(feature = "processing")]
970            RelayCounters::ProcessingMessageProduced => "processing.event.produced",
971            #[cfg(feature = "processing")]
972            RelayCounters::SpanV2Produced => "store.produced.span_v2",
973            RelayCounters::EventProtocol => "event.protocol",
974            RelayCounters::EventTransaction => "event.transaction",
975            RelayCounters::TransactionNameChanges => "event.transaction_name_changes",
976            RelayCounters::Requests => "requests",
977            RelayCounters::ResponsesStatusCodes => "responses.status_codes",
978            RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction",
979            RelayCounters::RefreshStaleProjectCaches => "project_cache.refresh",
980            RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed",
981            RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name",
982            RelayCounters::OpenTelemetryEvent => "event.opentelemetry",
983            RelayCounters::GlobalConfigFetched => "global_config.fetch",
984            RelayCounters::FeedbackAttachments => "processing.feedback_attachments",
985            RelayCounters::CogsUsage => "cogs.usage",
986            RelayCounters::ProjectStateFlushMetricsNoProject => "project_state.metrics.no_project",
987            RelayCounters::BucketsDropped => "metrics.buckets.dropped",
988            RelayCounters::ReplayExceededSegmentLimit => "replay.segment_limit_exceeded",
989            RelayCounters::ServerSocketAccept => "server.http.accepted",
990            RelayCounters::ServerConnectionIdleTimeout => "server.http.idle_timeout",
991            #[cfg(feature = "processing")]
992            RelayCounters::MetricDelaySum => "metrics.delay.sum",
993            #[cfg(feature = "processing")]
994            RelayCounters::MetricDelayCount => "metrics.delay.count",
995            #[cfg(all(sentry, feature = "processing"))]
996            RelayCounters::PlaystationProcessing => "processing.playstation",
997            RelayCounters::SamplingDecision => "sampling.decision",
998            #[cfg(feature = "processing")]
999            RelayCounters::AttachmentUpload => "attachment.upload",
1000            RelayCounters::EnvelopeWithLogs => "logs.envelope",
1001        }
1002    }
1003}