objectstore_server/
config.rs

1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//!   type: filesystem
31//!   path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::auth::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49use crate::rate_limits::RateLimits;
50
51/// Environment variable prefix for all configuration options.
52const ENV_PREFIX: &str = "OS__";
53
54/// Newtype around `String` that may protect against accidental
55/// logging of secrets in our configuration struct. Use with
56/// [`secrecy::SecretBox`].
57#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
58pub struct ConfigSecret(String);
59
60impl ConfigSecret {
61    /// Returns the secret value as a string slice.
62    pub fn as_str(&self) -> &str {
63        self.0.as_str()
64    }
65}
66
67impl From<&str> for ConfigSecret {
68    fn from(str: &str) -> Self {
69        ConfigSecret(str.to_string())
70    }
71}
72
73impl std::ops::Deref for ConfigSecret {
74    type Target = str;
75    fn deref(&self) -> &Self::Target {
76        &self.0
77    }
78}
79
80impl fmt::Debug for ConfigSecret {
81    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
82        write!(f, "[redacted]")
83    }
84}
85
86impl CloneableSecret for ConfigSecret {}
87impl SerializableSecret for ConfigSecret {}
88impl Zeroize for ConfigSecret {
89    fn zeroize(&mut self) {
90        self.0.zeroize();
91    }
92}
93
94/// Storage backend configuration.
95///
96/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
97///
98/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
99#[derive(Debug, Deserialize, Serialize)]
100#[serde(tag = "type", rename_all = "lowercase")]
101pub enum Storage {
102    /// Local filesystem storage backend (type `"filesystem"`).
103    ///
104    /// Stores objects as files on the local filesystem. Suitable for development, testing,
105    /// and single-server deployments.
106    ///
107    /// # Example
108    ///
109    /// ```yaml
110    /// long_term_storage:
111    ///   type: filesystem
112    ///   path: /data
113    /// ```
114    FileSystem {
115        /// Directory path for storing objects.
116        ///
117        /// The directory will be created if it doesn't exist. Relative paths are resolved from
118        /// the server's working directory.
119        ///
120        /// # Default
121        ///
122        /// `"data"` (relative to the server's working directory)
123        ///
124        /// # Environment Variables
125        ///
126        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
127        /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
128        ///
129        /// Or for long-term storage:
130        /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
131        /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
132        path: PathBuf,
133    },
134
135    /// S3-compatible storage backend (type `"s3compatible"`).
136    ///
137    /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
138    /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
139    ///
140    /// [Amazon S3]: https://aws.amazon.com/s3/
141    ///
142    /// # Example
143    ///
144    /// ```yaml
145    /// long_term_storage:
146    ///   type: s3compatible
147    ///   endpoint: https://s3.amazonaws.com
148    ///   bucket: my-bucket
149    /// ```
150    S3Compatible {
151        /// S3 endpoint URL.
152        ///
153        /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
154        ///
155        /// # Environment Variables
156        ///
157        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
158        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
159        ///
160        /// Or for long-term storage:
161        /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
162        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
163        endpoint: String,
164
165        /// S3 bucket name.
166        ///
167        /// The bucket must exist before starting the server.
168        ///
169        /// # Environment Variables
170        ///
171        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
172        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
173        bucket: String,
174    },
175
176    /// [Google Cloud Storage] backend (type `"gcs"`).
177    ///
178    /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
179    /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
180    /// environment variable or GCE/GKE metadata service.
181    ///
182    /// **Note**: The bucket must be pre-created with the following lifecycle policy:
183    /// - `daysSinceCustomTime`: 1 day
184    /// - `action`: delete
185    ///
186    /// [Google Cloud Storage]: https://cloud.google.com/storage
187    ///
188    /// # Example
189    ///
190    /// ```yaml
191    /// long_term_storage:
192    ///   type: gcs
193    ///   bucket: objectstore-bucket
194    /// ```
195    Gcs {
196        /// Optional custom GCS endpoint URL.
197        ///
198        /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
199        ///
200        /// # Default
201        ///
202        /// `None` (uses default GCS endpoint)
203        ///
204        /// # Environment Variables
205        ///
206        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
207        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
208        ///
209        /// Or for long-term storage:
210        /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
211        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
212        endpoint: Option<String>,
213
214        /// GCS bucket name.
215        ///
216        /// The bucket must exist before starting the server.
217        ///
218        /// # Environment Variables
219        ///
220        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
221        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
222        bucket: String,
223    },
224
225    /// [Google Bigtable] backend (type `"bigtable"`).
226    ///
227    /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
228    /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
229    /// Application Default Credentials (ADC).
230    ///
231    /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
232    /// following column families:
233    /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
234    /// - `fm`: manual garbage collection (`no GC policy`)
235    ///
236    /// [Google Bigtable]: https://cloud.google.com/bigtable
237    ///
238    /// # Example
239    ///
240    /// ```yaml
241    /// high_volume_storage:
242    ///   type: bigtable
243    ///   project_id: my-project
244    ///   instance_name: objectstore
245    ///   table_name: objectstore
246    /// ```
247    BigTable {
248        /// Optional custom Bigtable endpoint.
249        ///
250        /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
251        ///
252        /// # Default
253        ///
254        /// `None` (uses default Bigtable endpoint)
255        ///
256        /// # Environment Variables
257        ///
258        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
259        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
260        ///
261        /// Or for long-term storage:
262        /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
263        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
264        endpoint: Option<String>,
265
266        /// GCP project ID.
267        ///
268        /// The Google project ID (not project number) containing the Bigtable instance.
269        ///
270        /// # Environment Variables
271        ///
272        /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
273        /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
274        project_id: String,
275
276        /// Bigtable instance name.
277        ///
278        /// # Environment Variables
279        ///
280        /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
281        /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
282        instance_name: String,
283
284        /// Bigtable table name.
285        ///
286        /// The table must exist before starting the server.
287        ///
288        /// # Environment Variables
289        ///
290        /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
291        /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
292        table_name: String,
293
294        /// Optional number of connections to maintain to Bigtable.
295        ///
296        /// # Default
297        ///
298        /// `None` (defaults to 1)
299        ///
300        /// # Environment Variables
301        ///
302        /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
303        /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
304        connections: Option<usize>,
305    },
306}
307
308/// Runtime configuration for the Tokio async runtime.
309///
310/// Controls the threading behavior of the server's async runtime.
311///
312/// Used in: [`Config::runtime`]
313#[derive(Debug, Deserialize, Serialize)]
314#[serde(default)]
315pub struct Runtime {
316    /// Number of worker threads for the server runtime.
317    ///
318    /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
319    /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
320    /// switching overhead.
321    ///
322    /// Set this in accordance with the resources available to the server, especially in Kubernetes
323    /// environments.
324    ///
325    /// # Default
326    ///
327    /// Defaults to the number of CPU cores on the host machine.
328    ///
329    /// # Environment Variable
330    ///
331    /// `OS__RUNTIME__WORKER_THREADS`
332    ///
333    /// # Considerations
334    ///
335    /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
336    /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
337    /// - Setting this too high can lead to increased memory usage and context switching
338    pub worker_threads: usize,
339
340    /// Interval in seconds for reporting internal runtime metrics.
341    ///
342    /// Defaults to `10` seconds.
343    #[serde(with = "humantime_serde")]
344    pub metrics_interval: Duration,
345}
346
347impl Default for Runtime {
348    fn default() -> Self {
349        Self {
350            worker_threads: num_cpus::get(),
351            metrics_interval: Duration::from_secs(10),
352        }
353    }
354}
355
356/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
357///
358/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
359/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
360///
361/// Used in: [`Config::sentry`]
362#[derive(Debug, Deserialize, Serialize)]
363pub struct Sentry {
364    /// Sentry DSN (Data Source Name).
365    ///
366    /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
367    /// integration is completely disabled.
368    ///
369    /// # Default
370    ///
371    /// `None` (Sentry disabled)
372    ///
373    /// # Environment Variable
374    ///
375    /// `OS__SENTRY__DSN`
376    pub dsn: Option<SecretBox<ConfigSecret>>,
377
378    /// Environment name for this deployment.
379    ///
380    /// Used to distinguish events from different environments (e.g., "production", "staging",
381    /// "development"). This appears in the Sentry UI and can be used for filtering.
382    ///
383    /// # Default
384    ///
385    /// `None`
386    ///
387    /// # Environment Variable
388    ///
389    /// `OS__SENTRY__ENVIRONMENT`
390    pub environment: Option<Cow<'static, str>>,
391
392    /// Server name or identifier.
393    ///
394    /// Used to identify which server instance sent an event. Useful in multi-server deployments for
395    /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
396    ///
397    /// # Default
398    ///
399    /// `None`
400    ///
401    /// # Environment Variable
402    ///
403    /// `OS__SENTRY__SERVER_NAME`
404    pub server_name: Option<Cow<'static, str>>,
405
406    /// Error event sampling rate.
407    ///
408    /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
409    /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
410    ///
411    /// # Default
412    ///
413    /// `1.0` (send all errors)
414    ///
415    /// # Environment Variable
416    ///
417    /// `OS__SENTRY__SAMPLE_RATE`
418    pub sample_rate: f32,
419
420    /// Performance trace sampling rate.
421    ///
422    /// Controls what percentage of transactions (traces) are sent to Sentry for performance
423    /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
424    ///
425    /// **Important**: Performance traces can generate significant data volume in high-traffic
426    /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
427    ///
428    /// # Default
429    ///
430    /// `0.01` (send 1% of traces)
431    ///
432    /// # Environment Variable
433    ///
434    /// `OS__SENTRY__TRACES_SAMPLE_RATE`
435    pub traces_sample_rate: f32,
436
437    /// Whether to inherit sampling decisions from incoming traces.
438    ///
439    /// When `true` (default), if an incoming request contains a distributed tracing header with a
440    /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
441    /// the local `traces_sample_rate` is always used instead.
442    ///
443    /// When this is enabled, the calling service effectively controls the sampling decision for the
444    /// entire trace. Set this to `false` if you want to have independent sampling control at the
445    /// objectstore level.
446    ///
447    /// # Default
448    ///
449    /// `true`
450    ///
451    /// # Environment Variable
452    ///
453    /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
454    pub inherit_sampling_decision: bool,
455
456    /// Enable Sentry SDK debug mode.
457    ///
458    /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
459    /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
460    /// production as it generates verbose logging.
461    ///
462    /// # Default
463    ///
464    /// `false`
465    ///
466    /// # Environment Variable
467    ///
468    /// `OS__SENTRY__DEBUG`
469    pub debug: bool,
470
471    /// Additional tags to attach to all Sentry events.
472    ///
473    /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
474    /// context such as deployment identifiers or environment details.
475    ///
476    /// # Default
477    ///
478    /// Empty (no tags)
479    ///
480    /// # Environment Variables
481    ///
482    /// Each tag is set individually:
483    /// - `OS__SENTRY__TAGS__FOO=foo`
484    /// - `OS__SENTRY__TAGS__BAR=bar`
485    ///
486    /// # YAML Example
487    ///
488    /// ```yaml
489    /// sentry:
490    ///   tags:
491    ///     foo: foo
492    ///     bar: bar
493    /// ```
494    pub tags: BTreeMap<String, String>,
495}
496
497impl Sentry {
498    /// Returns whether Sentry integration is enabled.
499    ///
500    /// Sentry is considered enabled if a DSN is configured.
501    pub fn is_enabled(&self) -> bool {
502        self.dsn.is_some()
503    }
504}
505
506impl Default for Sentry {
507    fn default() -> Self {
508        Self {
509            dsn: None,
510            environment: None,
511            server_name: None,
512            sample_rate: 1.0,
513            traces_sample_rate: 0.01,
514            inherit_sampling_decision: true,
515            debug: false,
516            tags: BTreeMap::new(),
517        }
518    }
519}
520
521/// Log output format.
522///
523/// Controls how log messages are formatted. The format can be explicitly specified or
524/// auto-detected based on whether output is to a TTY.
525#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
526#[serde(rename_all = "lowercase")]
527pub enum LogFormat {
528    /// Auto detect the best format.
529    ///
530    /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
531    Auto,
532
533    /// Pretty printing with colors.
534    ///
535    /// ```text
536    ///  INFO  objectstore::http > objectstore starting
537    /// ```
538    Pretty,
539
540    /// Simplified plain text output.
541    ///
542    /// ```text
543    /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
544    /// ```
545    Simplified,
546
547    /// Dump out JSON lines.
548    ///
549    /// ```text
550    /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
551    /// ```
552    Json,
553}
554
555/// The logging format parse error.
556#[derive(Clone, Debug)]
557pub struct FormatParseError(String);
558
559impl fmt::Display for FormatParseError {
560    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
561        write!(
562            f,
563            r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
564            self.0
565        )
566    }
567}
568
569impl std::str::FromStr for LogFormat {
570    type Err = FormatParseError;
571
572    fn from_str(s: &str) -> Result<Self, Self::Err> {
573        let result = match s {
574            "" => LogFormat::Auto,
575            s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
576            s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
577            s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
578            s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
579            s => return Err(FormatParseError(s.into())),
580        };
581
582        Ok(result)
583    }
584}
585
586impl std::error::Error for FormatParseError {}
587
588mod display_fromstr {
589    pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
590    where
591        S: serde::Serializer,
592        T: std::fmt::Display,
593    {
594        serializer.collect_str(&value)
595    }
596
597    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
598    where
599        D: serde::Deserializer<'de>,
600        T: std::str::FromStr,
601        <T as std::str::FromStr>::Err: std::fmt::Display,
602    {
603        use serde::Deserialize;
604        let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
605        s.parse().map_err(serde::de::Error::custom)
606    }
607}
608
609/// Logging configuration.
610///
611/// Controls the verbosity and format of log output. Logs are always written to stderr.
612///
613/// Used in: [`Config::logging`]
614#[derive(Debug, Deserialize, Serialize)]
615pub struct Logging {
616    /// Minimum log level to output.
617    ///
618    /// Controls which log messages are emitted based on their severity. Messages at or above this
619    /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
620    /// OFF.
621    ///
622    /// The `RUST_LOG` environment variable provides more granular control per module if needed.
623    ///
624    /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
625    /// only for debugging.
626    ///
627    /// # Default
628    ///
629    /// `INFO`
630    ///
631    /// # Environment Variable
632    ///
633    /// `OS__LOGGING__LEVEL`
634    ///
635    /// # Considerations
636    ///
637    /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
638    /// - `INFO` is appropriate for production
639    /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
640    /// -
641    #[serde(with = "display_fromstr")]
642    pub level: LevelFilter,
643
644    /// Log output format.
645    ///
646    /// Determines how log messages are formatted. See [`LogFormat`] for available options and
647    /// examples.
648    ///
649    /// # Default
650    ///
651    /// `Auto` (pretty for TTY, simplified otherwise)
652    ///
653    /// # Environment Variable
654    ///
655    /// `OS__LOGGING__FORMAT`
656    pub format: LogFormat,
657}
658
659impl Default for Logging {
660    fn default() -> Self {
661        Self {
662            level: LevelFilter::INFO,
663            format: LogFormat::Auto,
664        }
665    }
666}
667
668// Metrics configuration is defined in `objectstore_metrics::MetricsConfig`.
669
670/// A key that may be used to verify a request's `Authorization` header and its
671/// associated permissions. May contain multiple key versions to facilitate rotation.
672#[derive(Debug, Deserialize, Serialize)]
673pub struct AuthZVerificationKey {
674    /// Files that contain versions of this key's key material which may be used to verify
675    /// signatures.
676    ///
677    /// If a key is being rotated, the old and new versions of that key should both be
678    /// configured so objectstore can verify signatures while the updated key is still
679    /// rolling out. Otherwise, this should only contain the most recent version of a key.
680    pub key_files: Vec<PathBuf>,
681
682    /// The maximum set of permissions that this key's signer is authorized to grant.
683    ///
684    /// If a request's `Authorization` header grants full permission but it was signed by
685    /// a key that is only allowed to grant read permission, then the request only has
686    /// read permission.
687    #[serde(default)]
688    pub max_permissions: HashSet<Permission>,
689}
690
691/// Configuration for content-based authorization.
692#[derive(Debug, Default, Deserialize, Serialize)]
693pub struct AuthZ {
694    /// Whether to enforce content-based authorization or not.
695    ///
696    /// If this is set to `false`, checks are still performed but failures will not result
697    /// in `403 Unauthorized` responses.
698    pub enforce: bool,
699
700    /// Keys that may be used to verify a request's `Authorization` header.
701    ///
702    /// This field is a container that is keyed on a key's ID. When verifying a JWT
703    /// from the `Authorization` header, the `kid` field should be read from the JWT
704    /// header and used to index into this map to select the appropriate key.
705    #[serde(default)]
706    pub keys: BTreeMap<String, AuthZVerificationKey>,
707}
708
709/// Main configuration struct for the objectstore server.
710///
711/// This is the top-level configuration that combines all server settings including networking,
712/// storage backends, runtime, and observability options.
713///
714/// Configuration is loaded with the following precedence (highest to lowest):
715/// 1. Environment variables (prefixed with `OS__`)
716/// 2. YAML configuration file (if provided via `-c` flag)
717/// 3. Default values
718///
719/// See individual field documentation for details on each configuration option, including
720/// defaults and environment variables.
721#[derive(Debug, Deserialize, Serialize)]
722pub struct Config {
723    /// HTTP server bind address.
724    ///
725    /// The socket address (IP and port) where the HTTP server will listen for incoming
726    /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
727    /// makes the server accessible from all network interfaces.
728    ///
729    /// # Default
730    ///
731    /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
732    ///
733    /// # Environment Variable
734    ///
735    /// `OS__HTTP_ADDR`
736    pub http_addr: SocketAddr,
737
738    /// Storage backend for high-volume, small objects.
739    ///
740    /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
741    /// access with many small objects is desired. Good candidates include Bigtable, local
742    /// filesystem (for development), or fast SSDs. Can be set to the same backend as
743    /// `long_term_storage` for simplicity.
744    ///
745    /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
746    /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
747    /// change in the future and more configuration options will be added to influence this
748    /// decision.
749    ///
750    /// # Default
751    ///
752    /// Filesystem storage in `./data/high-volume` directory
753    ///
754    /// # Environment Variables
755    ///
756    /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
757    ///   options.
758    ///
759    /// # Example
760    ///
761    /// ```yaml
762    /// high_volume_storage:
763    ///   type: bigtable
764    ///   project_id: my-project
765    ///   instance_name: objectstore
766    ///   table_name: objectstore
767    /// ```
768    pub high_volume_storage: Storage,
769
770    /// Storage backend for large objects with long-term retention.
771    ///
772    /// This backend is used for larger objects in scenarios with lower throughput and higher
773    /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
774    /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
775    ///
776    /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
777    /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
778    /// change in the future and more configuration options will be added to influence this
779    /// decision.
780    ///
781    /// # Default
782    ///
783    /// Filesystem storage in `./data/long-term` directory
784    ///
785    /// # Environment Variables
786    ///
787    /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
788    /// - Additional fields depending on the type (see [`Storage`])
789    ///
790    /// # Example
791    ///
792    /// ```yaml
793    /// long_term_storage:
794    ///   type: gcs
795    ///   bucket: my-objectstore-bucket
796    /// ```
797    pub long_term_storage: Storage,
798
799    /// Configuration of the internal task runtime.
800    ///
801    /// Controls the thread pool size and behavior of the async runtime powering the server.
802    /// See [`Runtime`] for configuration options.
803    pub runtime: Runtime,
804
805    /// Logging configuration.
806    ///
807    /// Controls log verbosity and output format. See [`Logging`] for configuration options.
808    pub logging: Logging,
809
810    /// Sentry error tracking configuration.
811    ///
812    /// Optional integration with Sentry for error tracking and performance monitoring.
813    /// See [`Sentry`] for configuration options.
814    pub sentry: Sentry,
815
816    /// Internal metrics configuration.
817    ///
818    /// Configures submission of internal metrics to a DogStatsD-compatible endpoint.
819    /// See [`objectstore_metrics::MetricsConfig`] for configuration options.
820    pub metrics: objectstore_metrics::MetricsConfig,
821
822    /// Content-based authorization configuration.
823    ///
824    /// Controls the verification and enforcement of content-based access control based on the
825    /// JWT in a request's `Authorization` header.
826    pub auth: AuthZ,
827
828    /// A list of matchers for requests to discard without processing.
829    pub killswitches: Killswitches,
830
831    /// Definitions for rate limits to enforce on incoming requests.
832    pub rate_limits: RateLimits,
833
834    /// Configuration for the [`StorageService`](objectstore_service::StorageService).
835    pub service: Service,
836
837    /// Configuration for the HTTP layer.
838    ///
839    /// Controls HTTP-level settings that operate before requests reach the
840    /// storage service. See [`Http`] for configuration options.
841    pub http: Http,
842}
843
844/// Configuration for the [`StorageService`](objectstore_service::StorageService).
845///
846/// Controls operational parameters of the storage service layer that sits
847/// between the HTTP server and the storage backends.
848///
849/// Used in: [`Config::service`]
850///
851/// # Environment Variables
852///
853/// - `OS__SERVICE__MAX_CONCURRENCY`
854#[derive(Debug, Deserialize, Serialize)]
855#[serde(default)]
856pub struct Service {
857    /// Maximum number of concurrent backend operations.
858    ///
859    /// This caps the total number of in-flight storage operations (reads,
860    /// writes, deletes) across all requests. Operations that exceed the limit
861    /// are rejected with HTTP 429.
862    ///
863    /// # Default
864    ///
865    /// [`DEFAULT_CONCURRENCY_LIMIT`](objectstore_service::service::DEFAULT_CONCURRENCY_LIMIT)
866    pub max_concurrency: usize,
867}
868
869impl Default for Service {
870    fn default() -> Self {
871        Self {
872            max_concurrency: objectstore_service::service::DEFAULT_CONCURRENCY_LIMIT,
873        }
874    }
875}
876
877/// Default maximum number of concurrent in-flight HTTP requests.
878///
879/// Requests beyond this limit are rejected with HTTP 503.
880pub const DEFAULT_MAX_HTTP_REQUESTS: usize = 10_000;
881
882/// Configuration for the HTTP layer.
883///
884/// Controls behaviour at the HTTP request level, before requests reach the
885/// storage service. Grouping these settings separately from [`Service`] keeps
886/// HTTP-layer and service-layer concerns distinct and provides a natural home
887/// for future HTTP-level settings (e.g. timeouts, body size limits).
888///
889/// Used in: [`Config::http`]
890///
891/// # Environment Variables
892///
893/// - `OS__HTTP__MAX_REQUESTS`
894#[derive(Debug, Deserialize, Serialize)]
895#[serde(default)]
896pub struct Http {
897    /// Maximum number of concurrent in-flight HTTP requests.
898    ///
899    /// This is a flood protection limit. When the number of requests currently
900    /// being processed reaches this value, new requests are rejected immediately
901    /// with HTTP 503. Health and readiness endpoints (`/health`, `/ready`) are
902    /// excluded from this limit.
903    ///
904    /// Unlike readiness-based backpressure, direct rejection responds in
905    /// milliseconds and recovers the moment any in-flight request completes.
906    ///
907    /// # Default
908    ///
909    /// [`DEFAULT_MAX_HTTP_REQUESTS`]
910    ///
911    /// # Environment Variable
912    ///
913    /// `OS__HTTP__MAX_REQUESTS`
914    pub max_requests: usize,
915}
916
917impl Default for Http {
918    fn default() -> Self {
919        Self {
920            max_requests: DEFAULT_MAX_HTTP_REQUESTS,
921        }
922    }
923}
924
925impl Default for Config {
926    fn default() -> Self {
927        Self {
928            http_addr: "0.0.0.0:8888".parse().unwrap(),
929
930            high_volume_storage: Storage::FileSystem {
931                path: PathBuf::from("data/high-volume"),
932            },
933            long_term_storage: Storage::FileSystem {
934                path: PathBuf::from("data/long-term"),
935            },
936
937            runtime: Runtime::default(),
938            logging: Logging::default(),
939            sentry: Sentry::default(),
940            metrics: objectstore_metrics::MetricsConfig::default(),
941            auth: AuthZ::default(),
942            killswitches: Killswitches::default(),
943            rate_limits: RateLimits::default(),
944            service: Service::default(),
945            http: Http::default(),
946        }
947    }
948}
949
950impl Config {
951    /// Loads configuration from the provided arguments.
952    ///
953    /// Configuration is merged in the following order (later sources override earlier ones):
954    /// 1. Default values
955    /// 2. YAML configuration file (if provided in `args`)
956    /// 3. Environment variables (prefixed with `OS__`)
957    ///
958    /// # Errors
959    ///
960    /// Returns an error if:
961    /// - The YAML configuration file cannot be read or parsed
962    /// - Environment variables contain invalid values
963    /// - Required fields are missing or invalid
964    pub fn load(path: Option<&Path>) -> Result<Self> {
965        let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
966        if let Some(path) = path {
967            figment = figment.merge(Yaml::file(path));
968        }
969        let config = figment
970            .merge(Env::prefixed(ENV_PREFIX).split("__"))
971            .extract()?;
972
973        Ok(config)
974    }
975}
976
977#[cfg(test)]
978#[expect(
979    clippy::result_large_err,
980    reason = "figment::Error is inherently large"
981)]
982mod tests {
983    use std::io::Write;
984
985    use secrecy::ExposeSecret;
986
987    use crate::killswitches::Killswitch;
988    use crate::rate_limits::{BandwidthLimits, RateLimits, ThroughputLimits, ThroughputRule};
989
990    use super::*;
991
992    #[test]
993    fn configurable_via_env() {
994        figment::Jail::expect_with(|jail| {
995            jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
996            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
997            jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
998            jail.set_env("OS__METRICS__TAGS__FOO", "bar");
999            jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
1000            jail.set_env("OS__SENTRY__DSN", "abcde");
1001            jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
1002            jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
1003            jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
1004            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
1005            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
1006
1007            let config = Config::load(None).unwrap();
1008
1009            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1010            else {
1011                panic!("expected s3 storage");
1012            };
1013            assert_eq!(endpoint, "http://localhost:8888");
1014            assert_eq!(bucket, "whatever");
1015            assert_eq!(
1016                config.metrics.tags,
1017                [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
1018            );
1019
1020            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1021            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1022            assert_eq!(
1023                config.sentry.server_name.as_deref(),
1024                Some("objectstore-deadbeef")
1025            );
1026            assert_eq!(config.sentry.sample_rate, 0.5);
1027            assert_eq!(config.sentry.traces_sample_rate, 0.5);
1028
1029            Ok(())
1030        });
1031    }
1032
1033    #[test]
1034    fn configurable_via_yaml() {
1035        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1036        tempfile
1037            .write_all(
1038                br#"
1039            long_term_storage:
1040                type: s3compatible
1041                endpoint: http://localhost:8888
1042                bucket: whatever
1043            sentry:
1044                dsn: abcde
1045                environment: production
1046                server_name: objectstore-deadbeef
1047                sample_rate: 0.5
1048                traces_sample_rate: 0.5
1049            "#,
1050            )
1051            .unwrap();
1052
1053        figment::Jail::expect_with(|_jail| {
1054            let config = Config::load(Some(tempfile.path())).unwrap();
1055
1056            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1057            else {
1058                panic!("expected s3 storage");
1059            };
1060            assert_eq!(endpoint, "http://localhost:8888");
1061            assert_eq!(bucket, "whatever");
1062
1063            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1064            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1065            assert_eq!(
1066                config.sentry.server_name.as_deref(),
1067                Some("objectstore-deadbeef")
1068            );
1069            assert_eq!(config.sentry.sample_rate, 0.5);
1070            assert_eq!(config.sentry.traces_sample_rate, 0.5);
1071
1072            Ok(())
1073        });
1074    }
1075
1076    #[test]
1077    fn configured_with_env_and_yaml() {
1078        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1079        tempfile
1080            .write_all(
1081                br#"
1082            long_term_storage:
1083                type: s3compatible
1084                endpoint: http://localhost:8888
1085                bucket: whatever
1086            "#,
1087            )
1088            .unwrap();
1089
1090        figment::Jail::expect_with(|jail| {
1091            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1092
1093            let config = Config::load(Some(tempfile.path())).unwrap();
1094
1095            let Storage::S3Compatible {
1096                endpoint,
1097                bucket: _bucket,
1098            } = &dbg!(&config).long_term_storage
1099            else {
1100                panic!("expected s3 storage");
1101            };
1102            // Env should overwrite the yaml config
1103            assert_eq!(endpoint, "http://localhost:9001");
1104
1105            Ok(())
1106        });
1107    }
1108
1109    #[test]
1110    fn metrics_addr_via_env() {
1111        figment::Jail::expect_with(|jail| {
1112            jail.set_env("OS__METRICS__ADDR", "127.0.0.1:8125");
1113
1114            let config = Config::load(None).unwrap();
1115            assert_eq!(config.metrics.addr.as_deref(), Some("127.0.0.1:8125"));
1116
1117            Ok(())
1118        });
1119    }
1120
1121    #[test]
1122    fn configure_auth_with_env() {
1123        figment::Jail::expect_with(|jail| {
1124            jail.set_env("OS__AUTH__ENFORCE", "true");
1125            jail.set_env(
1126                "OS__AUTH__KEYS",
1127                r#"{kid1={key_files=["abcde","fghij","this is a test\n  multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1128            );
1129
1130            let config = Config::load(None).unwrap();
1131
1132            assert!(config.auth.enforce);
1133
1134            let kid1 = config.auth.keys.get("kid1").unwrap();
1135            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1136            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1137            assert_eq!(
1138                kid1.key_files[2],
1139                Path::new("this is a test\n  multiline string\nend of string\n"),
1140            );
1141            assert_eq!(
1142                kid1.max_permissions,
1143                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1144            );
1145
1146            let kid2 = config.auth.keys.get("kid2").unwrap();
1147            assert_eq!(kid2.key_files[0], Path::new("12345"));
1148            assert_eq!(kid2.max_permissions, HashSet::new());
1149
1150            Ok(())
1151        });
1152    }
1153
1154    #[test]
1155    fn configure_auth_with_yaml() {
1156        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1157        tempfile
1158            .write_all(
1159                br#"
1160                auth:
1161                    enforce: true
1162                    keys:
1163                        kid1:
1164                            key_files:
1165                                - "abcde"
1166                                - "fghij"
1167                                - |
1168                                  this is a test
1169                                    multiline string
1170                                  end of string
1171                            max_permissions:
1172                                - "object.read"
1173                                - "object.write"
1174                        kid2:
1175                            key_files:
1176                                - "12345"
1177            "#,
1178            )
1179            .unwrap();
1180
1181        figment::Jail::expect_with(|_jail| {
1182            let config = Config::load(Some(tempfile.path())).unwrap();
1183
1184            assert!(config.auth.enforce);
1185
1186            let kid1 = config.auth.keys.get("kid1").unwrap();
1187            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1188            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1189            assert_eq!(
1190                kid1.key_files[2],
1191                Path::new("this is a test\n  multiline string\nend of string\n")
1192            );
1193            assert_eq!(
1194                kid1.max_permissions,
1195                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1196            );
1197
1198            let kid2 = config.auth.keys.get("kid2").unwrap();
1199            assert_eq!(kid2.key_files[0], Path::new("12345"));
1200            assert_eq!(kid2.max_permissions, HashSet::new());
1201
1202            Ok(())
1203        });
1204    }
1205
1206    #[test]
1207    fn configure_killswitches_with_yaml() {
1208        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1209        tempfile
1210            .write_all(
1211                br#"
1212                killswitches:
1213                  - usecase: broken_usecase
1214                  - scopes:
1215                      org: "42"
1216                  - service: "test-*"
1217                  - scopes:
1218                      org: "42"
1219                      project: "4711"
1220                  - usecase: attachments
1221                    scopes:
1222                      org: "42"
1223                    service: "test-*"
1224                "#,
1225            )
1226            .unwrap();
1227
1228        figment::Jail::expect_with(|_jail| {
1229            let expected = [
1230                Killswitch {
1231                    usecase: Some("broken_usecase".into()),
1232                    scopes: BTreeMap::new(),
1233                    service: None,
1234                    service_matcher: std::sync::OnceLock::new(),
1235                },
1236                Killswitch {
1237                    usecase: None,
1238                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1239                    service: None,
1240                    service_matcher: std::sync::OnceLock::new(),
1241                },
1242                Killswitch {
1243                    usecase: None,
1244                    scopes: BTreeMap::new(),
1245                    service: Some("test-*".into()),
1246                    service_matcher: std::sync::OnceLock::new(),
1247                },
1248                Killswitch {
1249                    usecase: None,
1250                    scopes: BTreeMap::from([
1251                        ("org".into(), "42".into()),
1252                        ("project".into(), "4711".into()),
1253                    ]),
1254                    service: None,
1255                    service_matcher: std::sync::OnceLock::new(),
1256                },
1257                Killswitch {
1258                    usecase: Some("attachments".into()),
1259                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1260                    service: Some("test-*".into()),
1261                    service_matcher: std::sync::OnceLock::new(),
1262                },
1263            ];
1264
1265            let config = Config::load(Some(tempfile.path())).unwrap();
1266            assert_eq!(&config.killswitches.0, &expected,);
1267
1268            Ok(())
1269        });
1270    }
1271
1272    #[test]
1273    fn configure_rate_limits_with_yaml() {
1274        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1275        tempfile
1276            .write_all(
1277                br#"
1278                rate_limits:
1279                  throughput:
1280                    global_rps: 1000
1281                    burst: 100
1282                    usecase_pct: 50
1283                    scope_pct: 25
1284                    rules:
1285                      - usecase: "high_priority"
1286                        scopes:
1287                          - ["org", "123"]
1288                        rps: 500
1289                      - scopes:
1290                          - ["org", "456"]
1291                          - ["project", "789"]
1292                        pct: 10
1293                  bandwidth:
1294                    global_bps: 1048576
1295                    usecase_pct: 50
1296                    scope_pct: 25
1297                "#,
1298            )
1299            .unwrap();
1300
1301        figment::Jail::expect_with(|_jail| {
1302            let expected = RateLimits {
1303                throughput: ThroughputLimits {
1304                    global_rps: Some(1000),
1305                    burst: 100,
1306                    usecase_pct: Some(50),
1307                    scope_pct: Some(25),
1308                    rules: vec![
1309                        ThroughputRule {
1310                            usecase: Some("high_priority".to_string()),
1311                            scopes: vec![("org".to_string(), "123".to_string())],
1312                            rps: Some(500),
1313                            pct: None,
1314                        },
1315                        ThroughputRule {
1316                            usecase: None,
1317                            scopes: vec![
1318                                ("org".to_string(), "456".to_string()),
1319                                ("project".to_string(), "789".to_string()),
1320                            ],
1321                            rps: None,
1322                            pct: Some(10),
1323                        },
1324                    ],
1325                },
1326                bandwidth: BandwidthLimits {
1327                    global_bps: Some(1_048_576),
1328                    usecase_pct: Some(50),
1329                    scope_pct: Some(25),
1330                },
1331            };
1332
1333            let config = Config::load(Some(tempfile.path())).unwrap();
1334            assert_eq!(config.rate_limits, expected);
1335
1336            Ok(())
1337        });
1338    }
1339}