relay_server/
statsd.rs

1use relay_statsd::{CounterMetric, DistributionMetric, GaugeMetric, TimerMetric};
2#[cfg(doc)]
3use relay_system::RuntimeMetrics;
4
5/// Gauge metrics used by Relay
6pub enum RelayGauges {
7    /// Tracks the number of futures waiting to be executed in the pool's queue.
8    ///
9    /// Useful for understanding the backlog of work and identifying potential bottlenecks.
10    ///
11    /// This metric is tagged with:
12    /// - `pool`: the name of the pool.
13    AsyncPoolQueueSize,
14    /// Tracks the utilization of the async pool.
15    ///
16    /// The utilization is a value between 0.0 and 100.0 which determines how busy the pool is doing
17    /// CPU-bound work.
18    ///
19    /// This metric is tagged with:
20    /// - `pool`: the name of the pool.
21    AsyncPoolUtilization,
22    /// Tracks the activity of the async pool.
23    ///
24    /// The activity is a value between 0.0 and 100.0 which determines how busy is the pool
25    /// w.r.t. to its provisioned capacity.
26    ///
27    /// This metric is tagged with:
28    /// - `pool`: the name of the pool.
29    AsyncPoolActivity,
30    /// The state of Relay with respect to the upstream connection.
31    /// Possible values are `0` for normal operations and `1` for a network outage.
32    NetworkOutage,
33    /// Number of elements in the envelope buffer across all the stacks.
34    ///
35    /// This metric is tagged with:
36    /// - `storage_type`: The type of storage used in the envelope buffer.
37    BufferEnvelopesCount,
38    /// The number of individual stacks in the priority queue.
39    ///
40    /// Per combination of `(own_key, sampling_key)`, a new stack is created.
41    BufferStackCount,
42    /// The used disk for the buffer.
43    BufferDiskUsed,
44    /// The currently used memory by the entire system.
45    ///
46    /// Relay uses the same value for its memory health check.
47    SystemMemoryUsed,
48    /// The total system memory.
49    ///
50    /// Relay uses the same value for its memory health check.
51    SystemMemoryTotal,
52    /// The number of connections currently being managed by the Redis Pool.
53    #[cfg(feature = "processing")]
54    RedisPoolConnections,
55    /// The number of idle connections in the Redis Pool.
56    #[cfg(feature = "processing")]
57    RedisPoolIdleConnections,
58    /// The maximum number of connections in the Redis pool.
59    #[cfg(feature = "processing")]
60    RedisPoolMaxConnections,
61    /// The number of futures waiting to grab a connection.
62    #[cfg(feature = "processing")]
63    RedisPoolWaitingForConnection,
64    /// The number of notifications in the broadcast channel of the project cache.
65    ProjectCacheNotificationChannel,
66    /// The number of scheduled and in progress fetches in the project cache.
67    ProjectCacheScheduledFetches,
68    /// Exposes the amount of currently open and handled connections by the server.
69    ServerActiveConnections,
70    /// Maximum delay of a metric bucket in seconds.
71    ///
72    /// The maximum is measured from initial creation of the bucket in an internal Relay
73    /// until it is produced to Kafka.
74    ///
75    /// This metric is tagged with:
76    /// - `namespace`: the metric namespace.
77    #[cfg(feature = "processing")]
78    MetricDelayMax,
79    /// Estimated percentage [0-100] of how busy Relay's internal services are.
80    ///
81    /// This metric is tagged with:
82    /// - `service`: the service name.
83    /// - `instance_id`: a for the service name unique identifier for the running service
84    ServiceUtilization,
85    /// Number of attachment uploads currently in flight.
86    #[cfg(feature = "processing")]
87    ConcurrentAttachmentUploads,
88}
89
90impl GaugeMetric for RelayGauges {
91    fn name(&self) -> &'static str {
92        match self {
93            Self::AsyncPoolQueueSize => "async_pool.queue_size",
94            Self::AsyncPoolUtilization => "async_pool.utilization",
95            Self::AsyncPoolActivity => "async_pool.activity",
96            Self::NetworkOutage => "upstream.network_outage",
97            Self::BufferEnvelopesCount => "buffer.envelopes_count",
98            Self::BufferStackCount => "buffer.stack_count",
99            Self::BufferDiskUsed => "buffer.disk_used",
100            Self::SystemMemoryUsed => "health.system_memory.used",
101            Self::SystemMemoryTotal => "health.system_memory.total",
102            #[cfg(feature = "processing")]
103            Self::RedisPoolConnections => "redis.pool.connections",
104            #[cfg(feature = "processing")]
105            Self::RedisPoolIdleConnections => "redis.pool.idle_connections",
106            #[cfg(feature = "processing")]
107            Self::RedisPoolMaxConnections => "redis.pool.max_connections",
108            #[cfg(feature = "processing")]
109            Self::RedisPoolWaitingForConnection => "redis.pool.waiting_for_connection",
110            Self::ProjectCacheNotificationChannel => "project_cache.notification_channel.size",
111            Self::ProjectCacheScheduledFetches => "project_cache.fetches.size",
112            Self::ServerActiveConnections => "server.http.connections",
113            #[cfg(feature = "processing")]
114            Self::MetricDelayMax => "metrics.delay.max",
115            Self::ServiceUtilization => "service.utilization",
116            #[cfg(feature = "processing")]
117            Self::ConcurrentAttachmentUploads => "attachment.upload.concurrent",
118        }
119    }
120}
121
122/// Gauge metrics collected from the Runtime.
123pub enum RuntimeGauges {
124    /// Exposes [`RuntimeMetrics::num_idle_threads`].
125    NumIdleThreads,
126    /// Exposes [`RuntimeMetrics::num_alive_tasks`].
127    NumAliveTasks,
128    /// Exposes [`RuntimeMetrics::blocking_queue_depth`].
129    BlockingQueueDepth,
130    /// Exposes [`RuntimeMetrics::num_blocking_threads`].
131    NumBlockingThreads,
132    /// Exposes [`RuntimeMetrics::num_idle_blocking_threads`].
133    NumIdleBlockingThreads,
134    /// Exposes [`RuntimeMetrics::num_workers`].
135    NumWorkers,
136    /// Exposes [`RuntimeMetrics::worker_local_queue_depth`].
137    ///
138    /// This metric is tagged with:
139    /// - `worker`: the worker id.
140    WorkerLocalQueueDepth,
141    /// Exposes [`RuntimeMetrics::worker_mean_poll_time`].
142    ///
143    /// This metric is tagged with:
144    /// - `worker`: the worker id.
145    WorkerMeanPollTime,
146}
147
148impl GaugeMetric for RuntimeGauges {
149    fn name(&self) -> &'static str {
150        match self {
151            RuntimeGauges::NumIdleThreads => "runtime.idle_threads",
152            RuntimeGauges::NumAliveTasks => "runtime.alive_tasks",
153            RuntimeGauges::BlockingQueueDepth => "runtime.blocking_queue_depth",
154            RuntimeGauges::NumBlockingThreads => "runtime.num_blocking_threads",
155            RuntimeGauges::NumIdleBlockingThreads => "runtime.num_idle_blocking_threads",
156            RuntimeGauges::NumWorkers => "runtime.num_workers",
157            RuntimeGauges::WorkerLocalQueueDepth => "runtime.worker_local_queue_depth",
158            RuntimeGauges::WorkerMeanPollTime => "runtime.worker_mean_poll_time",
159        }
160    }
161}
162
163/// Counter metrics collected from the Runtime.
164pub enum RuntimeCounters {
165    /// Exposes [`RuntimeMetrics::budget_forced_yield_count`].
166    BudgetForcedYieldCount,
167    /// Exposes [`RuntimeMetrics::worker_local_schedule_count`].
168    ///
169    /// This metric is tagged with:
170    /// - `worker`: the worker id.
171    WorkerLocalScheduleCount,
172    /// Exposes [`RuntimeMetrics::worker_noop_count`].
173    ///
174    /// This metric is tagged with:
175    /// - `worker`: the worker id.
176    WorkerNoopCount,
177    /// Exposes [`RuntimeMetrics::worker_overflow_count`].
178    ///
179    /// This metric is tagged with:
180    /// - `worker`: the worker id.
181    WorkerOverflowCount,
182    /// Exposes [`RuntimeMetrics::worker_park_count`].
183    ///
184    /// This metric is tagged with:
185    /// - `worker`: the worker id.
186    WorkerParkCount,
187    /// Exposes [`RuntimeMetrics::worker_poll_count`].
188    ///
189    /// This metric is tagged with:
190    /// - `worker`: the worker id.
191    WorkerPollCount,
192    /// Exposes [`RuntimeMetrics::worker_steal_count`].
193    ///
194    /// This metric is tagged with:
195    /// - `worker`: the worker id.
196    WorkerStealCount,
197    /// Exposes [`RuntimeMetrics::worker_steal_operations`].
198    ///
199    /// This metric is tagged with:
200    /// - `worker`: the worker id.
201    WorkerStealOperations,
202    /// Exposes [`RuntimeMetrics::worker_total_busy_duration`].
203    ///
204    /// This metric is tagged with:
205    /// - `worker`: the worker id.
206    WorkerTotalBusyDuration,
207}
208
209impl CounterMetric for RuntimeCounters {
210    fn name(&self) -> &'static str {
211        match self {
212            RuntimeCounters::BudgetForcedYieldCount => "runtime.budget_forced_yield_count",
213            RuntimeCounters::WorkerLocalScheduleCount => "runtime.worker_local_schedule_count",
214            RuntimeCounters::WorkerNoopCount => "runtime.worker_noop_count",
215            RuntimeCounters::WorkerOverflowCount => "runtime.worker_overflow_count",
216            RuntimeCounters::WorkerParkCount => "runtime.worker_park_count",
217            RuntimeCounters::WorkerPollCount => "runtime.worker_poll_count",
218            RuntimeCounters::WorkerStealCount => "runtime.worker_steal_count",
219            RuntimeCounters::WorkerStealOperations => "runtime.worker_steal_operations",
220            RuntimeCounters::WorkerTotalBusyDuration => "runtime.worker_total_busy_duration",
221        }
222    }
223}
224
225/// Histogram metrics used by Relay.
226pub enum RelayDistributions {
227    /// The number of bytes received by Relay for each individual envelope item type.
228    ///
229    /// This metric is tagged with:
230    ///  - `item_type`: The type of the items being counted.
231    ///  - `is_container`: Whether this item is a container holding multiple items.
232    EnvelopeItemSize,
233    /// The amount of bytes in the item payloads of an envelope pushed to the envelope buffer.
234    ///
235    /// This is not quite the same as the actual size of a serialized envelope, because it ignores
236    /// the envelope header and item headers.
237    BufferEnvelopeBodySize,
238    /// Size of a serialized envelope pushed to the envelope buffer.
239    BufferEnvelopeSize,
240    /// Size of a compressed envelope pushed to the envelope buffer.
241    BufferEnvelopeSizeCompressed,
242    /// The number of batches emitted per partition.
243    BatchesPerPartition,
244    /// The number of buckets in a batch emitted.
245    ///
246    /// This corresponds to the number of buckets that will end up in an envelope.
247    BucketsPerBatch,
248    /// The number of spans per processed transaction event.
249    ///
250    /// This metric is tagged with:
251    ///  - `platform`: The event's platform, such as `"javascript"`.
252    ///  - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
253    ///    Sentry's SDKs and defaults to "proprietary".
254    EventSpans,
255    /// Number of projects in the in-memory project cache that are waiting for their state to be
256    /// updated.
257    ///
258    /// See `project_cache.size` for more description of the project cache.
259    ProjectStatePending,
260    /// Number of project states **requested** from the upstream for each batch request.
261    ///
262    /// If multiple batches are updated concurrently, this metric is reported multiple times.
263    ///
264    /// The batch size can be configured with `cache.batch_size`. See `project_cache.size` for more
265    /// description of the project cache.
266    ProjectStateRequestBatchSize,
267    /// Number of project states **returned** from the upstream for each batch request.
268    ///
269    /// If multiple batches are updated concurrently, this metric is reported multiple times.
270    ///
271    /// See `project_cache.size` for more description of the project cache.
272    ProjectStateReceived,
273    /// Number of attempts required to fetch the config for a given project key.
274    ProjectStateAttempts,
275    /// Number of project states currently held in the in-memory project cache.
276    ///
277    /// The cache duration for project states can be configured with the following options:
278    ///
279    ///  - `cache.project_expiry`: The time after which a project state counts as expired. It is
280    ///    automatically refreshed if a request references the project after it has expired.
281    ///  - `cache.project_grace_period`: The time after expiry at which the project state will still
282    ///    be used to ingest events. Once the grace period expires, the cache is evicted and new
283    ///    requests wait for an update.
284    ///
285    /// There is no limit to the number of cached projects.
286    ProjectStateCacheSize,
287    /// The size of the compressed project config in the redis cache, in bytes.
288    #[cfg(feature = "processing")]
289    ProjectStateSizeBytesCompressed,
290    /// The size of the uncompressed project config in the redis cache, in bytes.
291    #[cfg(feature = "processing")]
292    ProjectStateSizeBytesDecompressed,
293    /// The number of upstream requests queued up for sending.
294    ///
295    /// Relay employs connection keep-alive whenever possible. Connections are kept open for _15_
296    /// seconds of inactivity or _75_ seconds of activity. If all connections are busy, they are
297    /// queued, which is reflected in this metric.
298    ///
299    /// This metric is tagged with:
300    ///  - `priority`: The queueing priority of the request, either `"high"` or `"low"`. The
301    ///    priority determines precedence in executing requests.
302    ///
303    /// The number of concurrent connections can be configured with:
304    ///  - `limits.max_concurrent_requests` for the overall number of connections
305    ///  - `limits.max_concurrent_queries` for the number of concurrent high-priority requests
306    UpstreamMessageQueueSize,
307    /// Counts the number of retries for each upstream http request.
308    ///
309    /// This metric is tagged with:
310    ///
311    ///   - `result`: What happened to the request, an enumeration with the following values:
312    ///     * `success`: The request was sent and returned a success code `HTTP 2xx`
313    ///     * `response_error`: The request was sent and it returned an HTTP error.
314    ///     * `payload_failed`: The request was sent but there was an error in interpreting the response.
315    ///     * `send_failed`: Failed to send the request due to a network error.
316    ///     * `rate_limited`: The request was rate limited.
317    ///     * `invalid_json`: The response could not be parsed back into JSON.
318    ///   - `route`: The endpoint that was called on the upstream.
319    ///   - `status-code`: The status code of the request when available, otherwise "-".
320    UpstreamRetries,
321    /// Size of envelopes sent over HTTP in bytes.
322    UpstreamQueryBodySize,
323    /// Size of queries (projectconfig queries, i.e. the request payload, not the response) sent by
324    /// Relay over HTTP in bytes.
325    UpstreamEnvelopeBodySize,
326    /// Size of batched global metrics requests sent by Relay over HTTP in bytes.
327    UpstreamMetricsBodySize,
328    /// Distribution of flush buckets over partition keys.
329    ///
330    /// The distribution of buckets should be even.
331    /// If it is not, this metric should expose it.
332    PartitionKeys,
333    /// Measures how many splits were performed when sending out a partition.
334    PartitionSplits,
335    /// Canonical size of a Trace Item.
336    ///
337    /// This is not the size in bytes, this is using the same algorithm we're using for the logs
338    /// billing category.
339    ///
340    /// This metric is tagged with:
341    ///  - `item`: the trace item type.
342    ///  - `too_large`: `true` or `false`, whether the item is bigger than the allowed size limit.
343    TraceItemCanonicalSize,
344}
345
346impl DistributionMetric for RelayDistributions {
347    fn name(&self) -> &'static str {
348        match self {
349            Self::EnvelopeItemSize => "event.item_size",
350            Self::EventSpans => "event.spans",
351            Self::BatchesPerPartition => "metrics.buckets.batches_per_partition",
352            Self::BucketsPerBatch => "metrics.buckets.per_batch",
353            Self::BufferEnvelopeBodySize => "buffer.envelope_body_size",
354            Self::BufferEnvelopeSize => "buffer.envelope_size",
355            Self::BufferEnvelopeSizeCompressed => "buffer.envelope_size.compressed",
356            Self::ProjectStatePending => "project_state.pending",
357            Self::ProjectStateAttempts => "project_state.attempts",
358            Self::ProjectStateRequestBatchSize => "project_state.request.batch_size",
359            Self::ProjectStateReceived => "project_state.received",
360            Self::ProjectStateCacheSize => "project_cache.size",
361            #[cfg(feature = "processing")]
362            Self::ProjectStateSizeBytesCompressed => "project_state.size_bytes.compressed",
363            #[cfg(feature = "processing")]
364            Self::ProjectStateSizeBytesDecompressed => "project_state.size_bytes.decompressed",
365            Self::UpstreamMessageQueueSize => "http_queue.size",
366            Self::UpstreamRetries => "upstream.retries",
367            Self::UpstreamQueryBodySize => "upstream.query.body_size",
368            Self::UpstreamEnvelopeBodySize => "upstream.envelope.body_size",
369            Self::UpstreamMetricsBodySize => "upstream.metrics.body_size",
370            Self::PartitionKeys => "metrics.buckets.partition_keys",
371            Self::PartitionSplits => "partition_splits",
372            Self::TraceItemCanonicalSize => "trace_item.canonical_size",
373        }
374    }
375}
376
377/// Timer metrics used by Relay
378pub enum RelayTimers {
379    /// Time in milliseconds spent deserializing an event from JSON bytes into the native data
380    /// structure on which Relay operates.
381    EventProcessingDeserialize,
382    /// Time in milliseconds spent running normalization on an event. Normalization
383    /// happens before envelope filtering and metric extraction.
384    EventProcessingNormalization,
385    /// Time in milliseconds spent running inbound data filters on an event.
386    EventProcessingFiltering,
387    /// Time in milliseconds spent checking for organization, project, and DSN rate limits.
388    ///
389    /// Not all events reach this point. After an event is rate limited for the first time, the rate
390    /// limit is cached. Events coming in after this will be discarded earlier in the request queue
391    /// and do not reach the processing queue.
392    ///
393    /// This metric is tagged with:
394    ///  - `type`: The type of limiter executed, `cached` or `consistent`.
395    ///  - `unit`: The item/unit of work which is being rate limited, only available for new
396    ///    processing pipelines.
397    EventProcessingRateLimiting,
398    /// Time in milliseconds spent in data scrubbing for the current event. Data scrubbing happens
399    /// last before serializing the event back to JSON.
400    EventProcessingPii,
401    /// Time spent converting the event from its in-memory reprsentation into a JSON string.
402    EventProcessingSerialization,
403    /// Time used to extract span metrics from an event.
404    EventProcessingSpanMetricsExtraction,
405    /// Time spent between the start of request handling and processing of the envelope.
406    ///
407    /// This includes streaming the request body, scheduling overheads, project config fetching,
408    /// batched requests and congestions in the internal processor. This does not include delays in
409    /// the incoming request (body upload) and skips all envelopes that are fast-rejected.
410    EnvelopeWaitTime,
411    /// Time in milliseconds spent in synchronous processing of envelopes.
412    ///
413    /// This timing covers the end-to-end processing in the CPU pool and comprises:
414    ///
415    ///  - `event_processing.deserialize`
416    ///  - `event_processing.pii`
417    ///  - `event_processing.serialization`
418    ///
419    /// With Relay in processing mode, this also includes the following timings:
420    ///
421    ///  - `event_processing.process`
422    ///  - `event_processing.filtering`
423    ///  - `event_processing.rate_limiting`
424    EnvelopeProcessingTime,
425    /// Total time in milliseconds an envelope spends in Relay from the time it is received until it
426    /// finishes processing and has been submitted to the upstream.
427    EnvelopeTotalTime,
428    /// Latency of project config updates until they reach Relay.
429    ///
430    /// The metric is calculated by using the creation timestamp of the project config
431    /// and when Relay updates its local cache with the new project config.
432    ///
433    /// No metric is emitted when Relay fetches a project config for the first time.
434    ///
435    /// This metric is tagged with:
436    ///  - `delay`: Bucketed amount of seconds passed between fetches.
437    ProjectCacheUpdateLatency,
438    /// Total time spent from starting to fetch a project config update to completing the fetch.
439    ProjectCacheFetchDuration,
440    /// Total time in milliseconds spent fetching queued project configuration updates requests to
441    /// resolve.
442    ///
443    /// Relay updates projects in batches. Every update cycle, Relay requests
444    /// `limits.max_concurrent_queries * cache.batch_size` projects from the upstream. This metric
445    /// measures the wall clock time for all concurrent requests in this loop.
446    ///
447    /// Note that after an update loop has completed, there may be more projects pending updates.
448    /// This is indicated by `project_state.pending`.
449    ProjectStateRequestDuration,
450    /// Time in milliseconds required to decompress a project config from redis.
451    ///
452    /// Note that this also times the cases where project config is uncompressed,
453    /// in which case the timer should be very close to zero.
454    #[cfg(feature = "processing")]
455    ProjectStateDecompression,
456    /// Total duration in milliseconds for handling inbound web requests until the HTTP response is
457    /// returned to the client.
458    ///
459    /// This does **not** correspond to the full event ingestion time. Requests for events that are
460    /// not immediately rejected due to bad data or cached rate limits always return `200 OK`. Full
461    /// validation and normalization occur asynchronously, which is reported by
462    /// `event.processing_time`.
463    ///
464    /// This metric is tagged with:
465    ///  - `method`: The HTTP method of the request.
466    ///  - `route`: Unique dashed identifier of the endpoint.
467    RequestsDuration,
468    /// Time spent on minidump scrubbing.
469    ///
470    /// This is the total time spent on parsing and scrubbing the minidump.  Even if no PII
471    /// scrubbing rules applied the minidump will still be parsed and the rules evaluated on
472    /// the parsed minidump, this duration is reported here with status of "n/a".
473    ///
474    /// This metric is tagged with:
475    ///
476    /// - `status`: Scrubbing status: "ok" means successful scrubbed, "error" means there
477    ///   was an error during scrubbing and finally "n/a" means scrubbing was successful
478    ///   but no scurbbing rules applied.
479    MinidumpScrubbing,
480    /// Time spent on view hierarchy scrubbing.
481    ///
482    /// This is the total time spent on parsing and scrubbing the view hierarchy json file.
483    ///
484    /// This metric is tagged with:
485    ///
486    /// - `status`: "ok" means successful scrubbed, "error" means there was an error during
487    ///   scrubbing
488    ViewHierarchyScrubbing,
489    /// Time spend on attachment scrubbing.
490    ///
491    /// This represents the total time spent on evaluating the scrubbing rules for an
492    /// attachment and the attachment scrubbing itself, regardless of whether any rules were
493    /// applied.  Note that minidumps which failed to be parsed (status="error" in
494    /// scrubbing.minidumps.duration) will be scrubbed as plain attachments and count
495    /// towards this.
496    ///
497    /// This metric is tagged with:
498    ///
499    ///   - `attachment_type`: The type of attachment, e.g. "minidump".
500    AttachmentScrubbing,
501    /// Total time spent to send request to upstream Relay and handle the response.
502    ///
503    /// This metric is tagged with:
504    ///
505    ///   - `result`: What happened to the request, an enumeration with the following values:
506    ///     * `success`: The request was sent and returned a success code `HTTP 2xx`
507    ///     * `response_error`: The request was sent and it returned an HTTP error.
508    ///     * `payload_failed`: The request was sent but there was an error in interpreting the response.
509    ///     * `send_failed`: Failed to send the request due to a network error.
510    ///     * `rate_limited`: The request was rate limited.
511    ///     * `invalid_json`: The response could not be parsed back into JSON.
512    ///   - `route`: The endpoint that was called on the upstream.
513    ///   - `status-code`: The status code of the request when available, otherwise "-".
514    ///   - `retries`: Number of retries bucket 0, 1, 2, few (3 - 10), many (more than 10).
515    UpstreamRequestsDuration,
516    /// The delay between the timestamp stated in a payload and the receive time.
517    ///
518    /// SDKs cannot transmit payloads immediately in all cases. Sometimes, crashes require that
519    /// events are sent after restarting the application. Similarly, SDKs buffer events during
520    /// network downtimes for later transmission. This metric measures the delay between the time of
521    /// the event and the time it arrives in Relay. The delay is measured after clock drift
522    /// correction is applied.
523    ///
524    /// Only payloads with a delay of more than 1 minute are captured.
525    ///
526    /// This metric is tagged with:
527    ///
528    ///  - `category`: The data category of the payload. Can be one of: `event`, `transaction`,
529    ///    `security`, or `session`.
530    TimestampDelay,
531    /// The time it takes the outcome aggregator to flush aggregated outcomes.
532    OutcomeAggregatorFlushTime,
533    /// Time in milliseconds spent on parsing, normalizing and scrubbing replay recordings.
534    ReplayRecordingProcessing,
535    /// Total time spent to send a request and receive the response from upstream.
536    GlobalConfigRequestDuration,
537    /// Timing in milliseconds for processing a message in the internal CPU pool.
538    ///
539    /// This metric is tagged with:
540    ///
541    ///  - `message`: The type of message that was processed.
542    ProcessMessageDuration,
543    /// Timing in milliseconds for processing a task in the project cache service.
544    ///
545    /// This metric is tagged with:
546    /// - `task`: The type of the task the project cache does.
547    ProjectCacheTaskDuration,
548    /// Timing in milliseconds for handling and responding to a health check request.
549    ///
550    /// This metric is tagged with:
551    ///  - `type`: The type of the health check, `liveness` or `readiness`.
552    HealthCheckDuration,
553    /// Temporary timing metric for how much time was spent evaluating span and transaction
554    /// rate limits using the `RateLimitBuckets` message in the processor.
555    ///
556    /// This metric is tagged with:
557    ///  - `category`: The data category evaluated.
558    ///  - `limited`: Whether the batch is rate limited.
559    ///  - `count`: How many items matching the data category are contained in the batch.
560    #[cfg(feature = "processing")]
561    RateLimitBucketsDuration,
562    /// Timing in milliseconds for processing a task in the aggregator service.
563    ///
564    /// This metric is tagged with:
565    ///  - `task`: The task being executed by the aggregator.
566    ///  - `aggregator`: The name of the aggregator.
567    AggregatorServiceDuration,
568    /// Timing in milliseconds for processing a message in the metric router service.
569    ///
570    /// This metric is tagged with:
571    ///  - `message`: The type of message that was processed.
572    MetricRouterServiceDuration,
573    /// Timing in milliseconds for processing a message in the metric store service.
574    ///
575    /// This metric is tagged with:
576    ///  - `message`: The type of message that was processed.
577    #[cfg(feature = "processing")]
578    StoreServiceDuration,
579    /// Timing in milliseconds for the time it takes for initialize the buffer.
580    BufferInitialization,
581    /// Timing in milliseconds for the time it takes for the buffer to pack & spool a batch.
582    ///
583    /// Contains the time it takes to pack multiple envelopes into a single memory blob.
584    BufferSpool,
585    /// Timing in milliseconds for the time it takes for the buffer to spool data to SQLite.
586    BufferSqlWrite,
587    /// Timing in milliseconds for the time it takes for the buffer to unspool data from disk.
588    BufferUnspool,
589    /// Timing in milliseconds for the time it takes for the buffer to push.
590    BufferPush,
591    /// Timing in milliseconds for the time it takes for the buffer to peek.
592    BufferPeek,
593    /// Timing in milliseconds for the time it takes for the buffer to pop.
594    BufferPop,
595    /// Timing in milliseconds for the time it takes for the buffer to drain its envelopes.
596    BufferDrain,
597    /// Timing in milliseconds for the time it takes for an envelope to be serialized.
598    BufferEnvelopesSerialization,
599    /// Timing in milliseconds for the time it takes for an envelope to be compressed.
600    BufferEnvelopeCompression,
601    /// Timing in milliseconds for the time it takes for an envelope to be decompressed.
602    BufferEnvelopeDecompression,
603    /// Timing in milliseconds to count spans in a serialized transaction payload.
604    CheckNestedSpans,
605    /// The time it needs to create a signature. Includes both the signature used for
606    /// trusted relays and for register challenges.
607    SignatureCreationDuration,
608    /// Time needed to upload an attachment to objectstore.
609    ///
610    /// Tagged by:
611    /// - `type`: "envelope" or "attachment_v2".
612    #[cfg(feature = "processing")]
613    AttachmentUploadDuration,
614}
615
616impl TimerMetric for RelayTimers {
617    fn name(&self) -> &'static str {
618        match self {
619            RelayTimers::EventProcessingDeserialize => "event_processing.deserialize",
620            RelayTimers::EventProcessingNormalization => "event_processing.normalization",
621            RelayTimers::EventProcessingFiltering => "event_processing.filtering",
622            RelayTimers::EventProcessingRateLimiting => "event_processing.rate_limiting",
623            RelayTimers::EventProcessingPii => "event_processing.pii",
624            RelayTimers::EventProcessingSpanMetricsExtraction => {
625                "event_processing.span_metrics_extraction"
626            }
627            RelayTimers::EventProcessingSerialization => "event_processing.serialization",
628            RelayTimers::EnvelopeWaitTime => "event.wait_time",
629            RelayTimers::EnvelopeProcessingTime => "event.processing_time",
630            RelayTimers::EnvelopeTotalTime => "event.total_time",
631            RelayTimers::ProjectStateRequestDuration => "project_state.request.duration",
632            #[cfg(feature = "processing")]
633            RelayTimers::ProjectStateDecompression => "project_state.decompression",
634            RelayTimers::ProjectCacheUpdateLatency => "project_cache.latency",
635            RelayTimers::ProjectCacheFetchDuration => "project_cache.fetch.duration",
636            RelayTimers::RequestsDuration => "requests.duration",
637            RelayTimers::MinidumpScrubbing => "scrubbing.minidumps.duration",
638            RelayTimers::ViewHierarchyScrubbing => "scrubbing.view_hierarchy_scrubbing.duration",
639            RelayTimers::AttachmentScrubbing => "scrubbing.attachments.duration",
640            RelayTimers::UpstreamRequestsDuration => "upstream.requests.duration",
641            RelayTimers::TimestampDelay => "requests.timestamp_delay",
642            RelayTimers::OutcomeAggregatorFlushTime => "outcomes.aggregator.flush_time",
643            RelayTimers::ReplayRecordingProcessing => "replay.recording.process",
644            RelayTimers::GlobalConfigRequestDuration => "global_config.requests.duration",
645            RelayTimers::ProcessMessageDuration => "processor.message.duration",
646            RelayTimers::ProjectCacheTaskDuration => "project_cache.task.duration",
647            RelayTimers::HealthCheckDuration => "health.message.duration",
648            #[cfg(feature = "processing")]
649            RelayTimers::RateLimitBucketsDuration => "processor.rate_limit_buckets",
650            RelayTimers::AggregatorServiceDuration => "metrics.aggregator.message.duration",
651            RelayTimers::MetricRouterServiceDuration => "metrics.router.message.duration",
652            #[cfg(feature = "processing")]
653            RelayTimers::StoreServiceDuration => "store.message.duration",
654            RelayTimers::BufferInitialization => "buffer.initialization.duration",
655            RelayTimers::BufferSpool => "buffer.spool.duration",
656            RelayTimers::BufferSqlWrite => "buffer.write.duration",
657            RelayTimers::BufferUnspool => "buffer.unspool.duration",
658            RelayTimers::BufferPush => "buffer.push.duration",
659            RelayTimers::BufferPeek => "buffer.peek.duration",
660            RelayTimers::BufferPop => "buffer.pop.duration",
661            RelayTimers::BufferDrain => "buffer.drain.duration",
662            RelayTimers::BufferEnvelopesSerialization => "buffer.envelopes_serialization",
663            RelayTimers::BufferEnvelopeCompression => "buffer.envelopes_compression",
664            RelayTimers::BufferEnvelopeDecompression => "buffer.envelopes_decompression",
665            RelayTimers::CheckNestedSpans => "envelope.check_nested_spans",
666            RelayTimers::SignatureCreationDuration => "signature.create.duration",
667            #[cfg(feature = "processing")]
668            RelayTimers::AttachmentUploadDuration => "attachment.upload.duration",
669        }
670    }
671}
672
673/// Counter metrics used by Relay
674pub enum RelayCounters {
675    /// Tracks the number of tasks driven to completion by the async pool.
676    ///
677    /// This metric is tagged with:
678    /// - `pool`: the name of the pool.
679    AsyncPoolFinishedTasks,
680    /// Number of Events that had corrupted (unprintable) event attributes.
681    ///
682    /// This currently checks for `environment` and `release`, for which we know that
683    /// some SDKs may send corrupted values.
684    EventCorrupted,
685    /// Number of envelopes accepted in the current time slot.
686    ///
687    /// This represents requests that have successfully passed rate limits and filters, and have
688    /// been sent to the upstream.
689    ///
690    /// This metric is tagged with:
691    ///  - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
692    ///    there was an error or bug.
693    EnvelopeAccepted,
694    /// Number of envelopes rejected in the current time slot.
695    ///
696    /// This includes envelopes being rejected because they are malformed or any other errors during
697    /// processing (including filtered events, invalid payloads, and rate limits).
698    ///
699    /// To check the rejection reason, check `events.outcomes`, instead.
700    ///
701    /// This metric is tagged with:
702    ///  - `handling`: Either `"success"` if the envelope was handled correctly, or `"failure"` if
703    ///    there was an error or bug.
704    EnvelopeRejected,
705    /// Number of total envelope items we received.
706    ///
707    /// Note: This does not count raw items, it counts the logical amount of items,
708    /// e.g. a single item container counts all its contained items.
709    ///
710    /// This metric is tagged with:
711    ///  - `item_type`: The type of the items being counted.
712    ///  - `is_container`: Whether this item is a container holding multiple items.
713    ///  - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
714    ///    Sentry's SDKs and defaults to "proprietary".
715    EnvelopeItems,
716    /// Number of bytes we processed per envelope item.
717    ///
718    /// This metric is tagged with:
719    ///  - `item_type`: The type of the items being counted.
720    ///  - `is_container`: Whether this item is a container holding multiple items.
721    ///  - `sdk`: The name of the Sentry SDK sending the envelope. This tag is only set for
722    ///    Sentry's SDKs and defaults to "proprietary".
723    EnvelopeItemBytes,
724    /// Number of times an envelope from the buffer is trying to be popped.
725    BufferTryPop,
726    /// Number of envelopes spool to disk.
727    BufferSpooledEnvelopes,
728    /// Number of envelopes unspooled from disk.
729    BufferUnspooledEnvelopes,
730    /// Number of project changed updates received by the buffer.
731    BufferProjectChangedEvent,
732    /// Number of times one or more projects of an envelope were pending when trying to pop
733    /// their envelope.
734    BufferProjectPending,
735    /// Number of iterations of the envelope buffer service loop.
736    BufferServiceLoopIteration,
737    /// Number of outcomes and reasons for rejected Envelopes.
738    ///
739    /// This metric is tagged with:
740    ///  - `outcome`: The basic cause for rejecting the event.
741    ///  - `reason`: A more detailed identifier describing the rule or mechanism leading to the
742    ///    outcome.
743    ///  - `to`: Describes the destination of the outcome. Can be either 'kafka' (when in
744    ///    processing mode) or 'http' (when outcomes are enabled in an external relay).
745    ///
746    /// Possible outcomes are:
747    ///  - `filtered`: Dropped by inbound data filters. The reason specifies the filter that
748    ///    matched.
749    ///  - `rate_limited`: Dropped by organization, project, or DSN rate limit, as well as exceeding
750    ///    the Sentry plan quota. The reason contains the rate limit or quota that was exceeded.
751    ///  - `invalid`: Data was considered invalid and could not be recovered. The reason indicates
752    ///    the validation that failed.
753    Outcomes,
754    /// The number of individual outcomes including their quantity.
755    ///
756    /// While [`RelayCounters::Outcomes`] tracks the number of times aggregated outcomes
757    /// have been emitted, this counter tracks the total quantity of individual outcomes.
758    OutcomeQuantity,
759    /// Number of project state HTTP requests.
760    ///
761    /// Relay updates projects in batches. Every update cycle, Relay requests
762    /// `limits.max_concurrent_queries` batches of `cache.batch_size` projects from the upstream.
763    /// The duration of these requests is reported via `project_state.request.duration`.
764    ///
765    /// Note that after an update loop has completed, there may be more projects pending updates.
766    /// This is indicated by `project_state.pending`.
767    ProjectStateRequest,
768    /// Number of times a project state is requested from the central Redis cache.
769    ///
770    /// This metric is tagged with:
771    ///  - `hit`: One of:
772    ///     - `revision`: the cached version was validated to be up to date using its revision.
773    ///     - `project_config`: the request was handled by the cache.
774    ///     - `project_config_revision`: the request was handled by the cache and the revision did
775    ///       not change.
776    ///     - `false`: the request will be sent to the sentry endpoint.
777    #[cfg(feature = "processing")]
778    ProjectStateRedis,
779    /// Number of times a project had a fetch scheduled.
780    ProjectCacheSchedule,
781    /// Number of times an upstream request for a project config is completed.
782    ///
783    /// Completion can be because a result was returned or because the config request was
784    /// dropped after there still was no response after a timeout.  This metrics has tags
785    /// for `result` and `attempts` indicating whether it was succesful or a timeout and how
786    /// many attempts were made respectively.
787    ProjectUpstreamCompleted,
788    /// Number of times an upstream request for a project config failed.
789    ///
790    /// Failure can happen, for example, when there's a network error. Refer to
791    /// [`UpstreamRequestError`](crate::services::upstream::UpstreamRequestError) for all cases.
792    ProjectUpstreamFailed,
793    /// Number of Relay server starts.
794    ///
795    /// This can be used to track unwanted restarts due to crashes or termination.
796    ServerStarting,
797    /// Number of messages placed on the Kafka queues.
798    ///
799    /// When Relay operates as Sentry service and an Envelope item is successfully processed, each
800    /// Envelope item results in a dedicated message on one of the ingestion topics on Kafka.
801    ///
802    /// This metric is tagged with:
803    ///  - `event_type`: The kind of message produced to Kafka.
804    ///  - `namespace` (only for metrics): The namespace that the metric belongs to.
805    ///  - `is_segment` (only for event_type span): `true` the span is the root of a segment.
806    ///  - `has_parent` (only for event_type span): `false` if the span is the root of a trace.
807    ///  - `platform` (only for event_type span): The platform from which the span was spent.
808    ///  - `metric_type` (only for event_type metric): The metric type, counter, distribution,
809    ///    gauge or set.
810    ///  - `metric_encoding` (only for event_type metric): The encoding used for distribution and
811    ///    set metrics.
812    ///
813    /// The message types can be:
814    ///
815    ///  - `event`: An error or transaction event. Error events are sent to `ingest-events`,
816    ///    transactions to `ingest-transactions`, and errors with attachments are sent to
817    ///    `ingest-attachments`.
818    ///  - `attachment`: An attachment file associated with an error event, sent to
819    ///    `ingest-attachments`.
820    ///  - `user_report`: A message from the user feedback dialog, sent to `ingest-events`.
821    ///  - `session`: A release health session update, sent to `ingest-sessions`.
822    #[cfg(feature = "processing")]
823    ProcessingMessageProduced,
824    /// Number of spans produced in the new format.
825    #[cfg(feature = "processing")]
826    SpanV2Produced,
827    /// Number of events that hit any of the store-like endpoints: Envelope, Store, Security,
828    /// Minidump, Unreal.
829    ///
830    /// The events are counted before they are rate limited, filtered, or processed in any way.
831    ///
832    /// This metric is tagged with:
833    ///  - `version`: The event protocol version number defaulting to `7`.
834    EventProtocol,
835    /// The number of transaction events processed by the source of the transaction name.
836    ///
837    /// This metric is tagged with:
838    ///  - `platform`: The event's platform, such as `"javascript"`.
839    ///  - `source`: The source of the transaction name on the client. See the [transaction source
840    ///    documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
841    ///    for all valid values.
842    ///  - `contains_slashes`: Whether the transaction name contains `/`. We use this as a heuristic
843    ///    to represent URL transactions.
844    EventTransaction,
845    /// The number of transaction events processed grouped by transaction name modifications.
846    /// This metric is tagged with:
847    ///  - `source_in`: The source of the transaction name before normalization.
848    ///    See the [transaction source
849    ///    documentation](https://develop.sentry.dev/sdk/event-payloads/properties/transaction_info/)
850    ///    for all valid values.
851    ///  - `change`: The mechanism that changed the transaction name.
852    ///    Either `"none"`, `"pattern"`, `"rule"`, or `"both"`.
853    ///  - `source_out`: The source of the transaction name after normalization.
854    TransactionNameChanges,
855    /// Number of HTTP requests reaching Relay.
856    Requests,
857    /// Number of completed HTTP requests.
858    ///
859    /// This metric is tagged with:
860    ///
861    ///  - `status_code`: The HTTP status code number.
862    ///  - `method`: The HTTP method used in the request in uppercase.
863    ///  - `route`: Unique dashed identifier of the endpoint.
864    ResponsesStatusCodes,
865    /// Number of evicted stale projects from the cache.
866    ///
867    /// Relay scans the in-memory project cache for stale entries in a regular interval configured
868    /// by `cache.eviction_interval`.
869    ///
870    /// The cache duration for project states can be configured with the following options:
871    ///
872    ///  - `cache.project_expiry`: The time after which a project state counts as expired. It is
873    ///    automatically refreshed if a request references the project after it has expired.
874    ///  - `cache.project_grace_period`: The time after expiry at which the project state will still
875    ///    be used to ingest events. Once the grace period expires, the cache is evicted and new
876    ///    requests wait for an update.
877    EvictingStaleProjectCaches,
878    /// Number of refreshes for stale projects in the cache.
879    RefreshStaleProjectCaches,
880    /// Number of times that parsing a metrics bucket item from an envelope failed.
881    MetricBucketsParsingFailed,
882    /// Count extraction of transaction names. Tag with the decision to drop / replace / use original.
883    MetricsTransactionNameExtracted,
884    /// Number of Events with an OpenTelemetry Context
885    ///
886    /// This metric is tagged with:
887    ///  - `platform`: The event's platform, such as `"javascript"`.
888    ///  - `sdk`: The name of the Sentry SDK sending the transaction. This tag is only set for
889    ///    Sentry's SDKs and defaults to "proprietary".
890    OpenTelemetryEvent,
891    /// Number of global config fetches from upstream. Only 2XX responses are
892    /// considered and ignores send errors (e.g. auth or network errors).
893    ///
894    /// This metric is tagged with:
895    ///  - `success`: whether deserializing the global config succeeded.
896    GlobalConfigFetched,
897    /// The number of attachments processed in the same envelope as a user_report_v2 event.
898    FeedbackAttachments,
899    /// All COGS tracked values.
900    ///
901    /// This metric is tagged with:
902    /// - `resource_id`: The COGS resource id.
903    /// - `app_feature`: The COGS app feature.
904    CogsUsage,
905    /// The amount of times metrics of a project have been flushed without the project being
906    /// fetched/available.
907    ProjectStateFlushMetricsNoProject,
908    /// Incremented every time a bucket is dropped.
909    ///
910    /// This should only happen when a project state is invalid during graceful shutdown.
911    ///
912    /// This metric is tagged with:
913    ///  - `aggregator`: The name of the metrics aggregator (usually `"default"`).
914    BucketsDropped,
915    /// Incremented every time a segment exceeds the expected limit.
916    ReplayExceededSegmentLimit,
917    /// Incremented every time the server accepts a new connection.
918    ServerSocketAccept,
919    /// Incremented every time the server aborts a connection because of an idle timeout.
920    ServerConnectionIdleTimeout,
921    /// The total delay of metric buckets in seconds.
922    ///
923    /// The delay is measured from initial creation of the bucket in an internal Relay
924    /// until it is produced to Kafka.
925    ///
926    /// Use [`Self::MetricDelayCount`] to calculate the average delay.
927    ///
928    /// This metric is tagged with:
929    /// - `namespace`: the metric namespace.
930    #[cfg(feature = "processing")]
931    MetricDelaySum,
932    /// The amount of buckets counted for the [`Self::MetricDelaySum`] metric.
933    ///
934    /// This metric is tagged with:
935    /// - `namespace`: the metric namespace.
936    #[cfg(feature = "processing")]
937    MetricDelayCount,
938    /// The amount of times PlayStation processing was attempted.
939    #[cfg(all(sentry, feature = "processing"))]
940    PlaystationProcessing,
941    /// The number of times a sampling decision was made.
942    ///
943    /// This metric is tagged with:
944    /// - `item`: what item the decision is taken for (transaction vs span).
945    SamplingDecision,
946    /// The number of times an upload of an attachment occurs.
947    ///
948    /// This metric is tagged with:
949    /// - `result`: `success` or the failure reason.
950    /// - `type`: `envelope` or `attachment_v2`
951    #[cfg(feature = "processing")]
952    AttachmentUpload,
953    /// Whether a logs envelope has a trace context header or not
954    ///
955    /// This metric is tagged with:
956    /// - `dsc`: yes or no
957    /// - `sdk`: low-cardinality client name
958    EnvelopeWithLogs,
959}
960
961impl CounterMetric for RelayCounters {
962    fn name(&self) -> &'static str {
963        match self {
964            RelayCounters::AsyncPoolFinishedTasks => "async_pool.finished_tasks",
965            RelayCounters::EventCorrupted => "event.corrupted",
966            RelayCounters::EnvelopeAccepted => "event.accepted",
967            RelayCounters::EnvelopeRejected => "event.rejected",
968            RelayCounters::EnvelopeItems => "event.items",
969            RelayCounters::EnvelopeItemBytes => "event.item_bytes",
970            RelayCounters::BufferTryPop => "buffer.try_pop",
971            RelayCounters::BufferSpooledEnvelopes => "buffer.spooled_envelopes",
972            RelayCounters::BufferUnspooledEnvelopes => "buffer.unspooled_envelopes",
973            RelayCounters::BufferProjectChangedEvent => "buffer.project_changed_event",
974            RelayCounters::BufferProjectPending => "buffer.project_pending",
975            RelayCounters::BufferServiceLoopIteration => "buffer.service_loop_iteration",
976            RelayCounters::Outcomes => "events.outcomes",
977            RelayCounters::OutcomeQuantity => "events.outcome_quantity",
978            RelayCounters::ProjectStateRequest => "project_state.request",
979            #[cfg(feature = "processing")]
980            RelayCounters::ProjectStateRedis => "project_state.redis.requests",
981            RelayCounters::ProjectUpstreamCompleted => "project_upstream.completed",
982            RelayCounters::ProjectUpstreamFailed => "project_upstream.failed",
983            RelayCounters::ProjectCacheSchedule => "project_cache.schedule",
984            RelayCounters::ServerStarting => "server.starting",
985            #[cfg(feature = "processing")]
986            RelayCounters::ProcessingMessageProduced => "processing.event.produced",
987            #[cfg(feature = "processing")]
988            RelayCounters::SpanV2Produced => "store.produced.span_v2",
989            RelayCounters::EventProtocol => "event.protocol",
990            RelayCounters::EventTransaction => "event.transaction",
991            RelayCounters::TransactionNameChanges => "event.transaction_name_changes",
992            RelayCounters::Requests => "requests",
993            RelayCounters::ResponsesStatusCodes => "responses.status_codes",
994            RelayCounters::EvictingStaleProjectCaches => "project_cache.eviction",
995            RelayCounters::RefreshStaleProjectCaches => "project_cache.refresh",
996            RelayCounters::MetricBucketsParsingFailed => "metrics.buckets.parsing_failed",
997            RelayCounters::MetricsTransactionNameExtracted => "metrics.transaction_name",
998            RelayCounters::OpenTelemetryEvent => "event.opentelemetry",
999            RelayCounters::GlobalConfigFetched => "global_config.fetch",
1000            RelayCounters::FeedbackAttachments => "processing.feedback_attachments",
1001            RelayCounters::CogsUsage => "cogs.usage",
1002            RelayCounters::ProjectStateFlushMetricsNoProject => "project_state.metrics.no_project",
1003            RelayCounters::BucketsDropped => "metrics.buckets.dropped",
1004            RelayCounters::ReplayExceededSegmentLimit => "replay.segment_limit_exceeded",
1005            RelayCounters::ServerSocketAccept => "server.http.accepted",
1006            RelayCounters::ServerConnectionIdleTimeout => "server.http.idle_timeout",
1007            #[cfg(feature = "processing")]
1008            RelayCounters::MetricDelaySum => "metrics.delay.sum",
1009            #[cfg(feature = "processing")]
1010            RelayCounters::MetricDelayCount => "metrics.delay.count",
1011            #[cfg(all(sentry, feature = "processing"))]
1012            RelayCounters::PlaystationProcessing => "processing.playstation",
1013            RelayCounters::SamplingDecision => "sampling.decision",
1014            #[cfg(feature = "processing")]
1015            RelayCounters::AttachmentUpload => "attachment.upload",
1016            RelayCounters::EnvelopeWithLogs => "logs.envelope",
1017        }
1018    }
1019}