objectstore_server/
config.rs

1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//!   type: filesystem
31//!   path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49use crate::rate_limits::RateLimits;
50
51/// Environment variable prefix for all configuration options.
52const ENV_PREFIX: &str = "OS__";
53
54/// Newtype around `String` that may protect against accidental
55/// logging of secrets in our configuration struct. Use with
56/// [`secrecy::SecretBox`].
57#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
58pub struct ConfigSecret(String);
59
60impl ConfigSecret {
61    pub fn as_str(&self) -> &str {
62        self.0.as_str()
63    }
64}
65
66impl From<&str> for ConfigSecret {
67    fn from(str: &str) -> Self {
68        ConfigSecret(str.to_string())
69    }
70}
71
72impl std::ops::Deref for ConfigSecret {
73    type Target = str;
74    fn deref(&self) -> &Self::Target {
75        &self.0
76    }
77}
78
79impl fmt::Debug for ConfigSecret {
80    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
81        write!(f, "[redacted]")
82    }
83}
84
85impl CloneableSecret for ConfigSecret {}
86impl SerializableSecret for ConfigSecret {}
87impl Zeroize for ConfigSecret {
88    fn zeroize(&mut self) {
89        self.0.zeroize();
90    }
91}
92
93/// Storage backend configuration.
94///
95/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
96///
97/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
98#[derive(Debug, Deserialize, Serialize)]
99#[serde(tag = "type", rename_all = "lowercase")]
100pub enum Storage {
101    /// Local filesystem storage backend (type `"filesystem"`).
102    ///
103    /// Stores objects as files on the local filesystem. Suitable for development, testing,
104    /// and single-server deployments.
105    ///
106    /// # Example
107    ///
108    /// ```yaml
109    /// long_term_storage:
110    ///   type: filesystem
111    ///   path: /data
112    /// ```
113    FileSystem {
114        /// Directory path for storing objects.
115        ///
116        /// The directory will be created if it doesn't exist. Relative paths are resolved from
117        /// the server's working directory.
118        ///
119        /// # Default
120        ///
121        /// `"data"` (relative to the server's working directory)
122        ///
123        /// # Environment Variables
124        ///
125        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
126        /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
127        ///
128        /// Or for long-term storage:
129        /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
130        /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
131        path: PathBuf,
132    },
133
134    /// S3-compatible storage backend (type `"s3compatible"`).
135    ///
136    /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
137    /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
138    ///
139    /// [Amazon S3]: https://aws.amazon.com/s3/
140    ///
141    /// # Example
142    ///
143    /// ```yaml
144    /// long_term_storage:
145    ///   type: s3compatible
146    ///   endpoint: https://s3.amazonaws.com
147    ///   bucket: my-bucket
148    /// ```
149    S3Compatible {
150        /// S3 endpoint URL.
151        ///
152        /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
153        ///
154        /// # Environment Variables
155        ///
156        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
157        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
158        ///
159        /// Or for long-term storage:
160        /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
161        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
162        endpoint: String,
163
164        /// S3 bucket name.
165        ///
166        /// The bucket must exist before starting the server.
167        ///
168        /// # Environment Variables
169        ///
170        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
171        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
172        bucket: String,
173    },
174
175    /// [Google Cloud Storage] backend (type `"gcs"`).
176    ///
177    /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
178    /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
179    /// environment variable or GCE/GKE metadata service.
180    ///
181    /// **Note**: The bucket must be pre-created with the following lifecycle policy:
182    /// - `daysSinceCustomTime`: 1 day
183    /// - `action`: delete
184    ///
185    /// [Google Cloud Storage]: https://cloud.google.com/storage
186    ///
187    /// # Example
188    ///
189    /// ```yaml
190    /// long_term_storage:
191    ///   type: gcs
192    ///   bucket: objectstore-bucket
193    /// ```
194    Gcs {
195        /// Optional custom GCS endpoint URL.
196        ///
197        /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
198        ///
199        /// # Default
200        ///
201        /// `None` (uses default GCS endpoint)
202        ///
203        /// # Environment Variables
204        ///
205        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
206        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
207        ///
208        /// Or for long-term storage:
209        /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
210        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
211        endpoint: Option<String>,
212
213        /// GCS bucket name.
214        ///
215        /// The bucket must exist before starting the server.
216        ///
217        /// # Environment Variables
218        ///
219        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
220        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
221        bucket: String,
222    },
223
224    /// [Google Bigtable] backend (type `"bigtable"`).
225    ///
226    /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
227    /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
228    /// Application Default Credentials (ADC).
229    ///
230    /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
231    /// following column families:
232    /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
233    /// - `fm`: manual garbage collection (`no GC policy`)
234    ///
235    /// [Google Bigtable]: https://cloud.google.com/bigtable
236    ///
237    /// # Example
238    ///
239    /// ```yaml
240    /// high_volume_storage:
241    ///   type: bigtable
242    ///   project_id: my-project
243    ///   instance_name: objectstore
244    ///   table_name: objectstore
245    /// ```
246    BigTable {
247        /// Optional custom Bigtable endpoint.
248        ///
249        /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
250        ///
251        /// # Default
252        ///
253        /// `None` (uses default Bigtable endpoint)
254        ///
255        /// # Environment Variables
256        ///
257        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
258        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
259        ///
260        /// Or for long-term storage:
261        /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
262        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
263        endpoint: Option<String>,
264
265        /// GCP project ID.
266        ///
267        /// The Google project ID (not project number) containing the Bigtable instance.
268        ///
269        /// # Environment Variables
270        ///
271        /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
272        /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
273        project_id: String,
274
275        /// Bigtable instance name.
276        ///
277        /// # Environment Variables
278        ///
279        /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
280        /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
281        instance_name: String,
282
283        /// Bigtable table name.
284        ///
285        /// The table must exist before starting the server.
286        ///
287        /// # Environment Variables
288        ///
289        /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
290        /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
291        table_name: String,
292
293        /// Optional number of connections to maintain to Bigtable.
294        ///
295        /// # Default
296        ///
297        /// `None` (infers connection count based on CPU count)
298        ///
299        /// # Environment Variables
300        ///
301        /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
302        /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
303        connections: Option<usize>,
304    },
305}
306
307/// Runtime configuration for the Tokio async runtime.
308///
309/// Controls the threading behavior of the server's async runtime.
310///
311/// Used in: [`Config::runtime`]
312#[derive(Debug, Deserialize, Serialize)]
313#[serde(default)]
314pub struct Runtime {
315    /// Number of worker threads for the server runtime.
316    ///
317    /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
318    /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
319    /// switching overhead.
320    ///
321    /// Set this in accordance with the resources available to the server, especially in Kubernetes
322    /// environments.
323    ///
324    /// # Default
325    ///
326    /// Defaults to the number of CPU cores on the host machine.
327    ///
328    /// # Environment Variable
329    ///
330    /// `OS__RUNTIME__WORKER_THREADS`
331    ///
332    /// # Considerations
333    ///
334    /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
335    /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
336    /// - Setting this too high can lead to increased memory usage and context switching
337    pub worker_threads: usize,
338
339    /// Interval in seconds for reporting internal runtime metrics.
340    ///
341    /// Defaults to `10` seconds.
342    #[serde(with = "humantime_serde")]
343    pub metrics_interval: Duration,
344}
345
346impl Default for Runtime {
347    fn default() -> Self {
348        Self {
349            worker_threads: num_cpus::get(),
350            metrics_interval: Duration::from_secs(10),
351        }
352    }
353}
354
355/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
356///
357/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
358/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
359///
360/// Used in: [`Config::sentry`]
361#[derive(Debug, Deserialize, Serialize)]
362pub struct Sentry {
363    /// Sentry DSN (Data Source Name).
364    ///
365    /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
366    /// integration is completely disabled.
367    ///
368    /// # Default
369    ///
370    /// `None` (Sentry disabled)
371    ///
372    /// # Environment Variable
373    ///
374    /// `OS__SENTRY__DSN`
375    pub dsn: Option<SecretBox<ConfigSecret>>,
376
377    /// Environment name for this deployment.
378    ///
379    /// Used to distinguish events from different environments (e.g., "production", "staging",
380    /// "development"). This appears in the Sentry UI and can be used for filtering.
381    ///
382    /// # Default
383    ///
384    /// `None`
385    ///
386    /// # Environment Variable
387    ///
388    /// `OS__SENTRY__ENVIRONMENT`
389    pub environment: Option<Cow<'static, str>>,
390
391    /// Server name or identifier.
392    ///
393    /// Used to identify which server instance sent an event. Useful in multi-server deployments for
394    /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
395    ///
396    /// # Default
397    ///
398    /// `None`
399    ///
400    /// # Environment Variable
401    ///
402    /// `OS__SENTRY__SERVER_NAME`
403    pub server_name: Option<Cow<'static, str>>,
404
405    /// Error event sampling rate.
406    ///
407    /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
408    /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
409    ///
410    /// # Default
411    ///
412    /// `1.0` (send all errors)
413    ///
414    /// # Environment Variable
415    ///
416    /// `OS__SENTRY__SAMPLE_RATE`
417    pub sample_rate: f32,
418
419    /// Performance trace sampling rate.
420    ///
421    /// Controls what percentage of transactions (traces) are sent to Sentry for performance
422    /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
423    ///
424    /// **Important**: Performance traces can generate significant data volume in high-traffic
425    /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
426    ///
427    /// # Default
428    ///
429    /// `0.01` (send 1% of traces)
430    ///
431    /// # Environment Variable
432    ///
433    /// `OS__SENTRY__TRACES_SAMPLE_RATE`
434    pub traces_sample_rate: f32,
435
436    /// Whether to inherit sampling decisions from incoming traces.
437    ///
438    /// When `true` (default), if an incoming request contains a distributed tracing header with a
439    /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
440    /// the local `traces_sample_rate` is always used instead.
441    ///
442    /// When this is enabled, the calling service effectively controls the sampling decision for the
443    /// entire trace. Set this to `false` if you want to have independent sampling control at the
444    /// objectstore level.
445    ///
446    /// # Default
447    ///
448    /// `true`
449    ///
450    /// # Environment Variable
451    ///
452    /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
453    pub inherit_sampling_decision: bool,
454
455    /// Enable Sentry SDK debug mode.
456    ///
457    /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
458    /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
459    /// production as it generates verbose logging.
460    ///
461    /// # Default
462    ///
463    /// `false`
464    ///
465    /// # Environment Variable
466    ///
467    /// `OS__SENTRY__DEBUG`
468    pub debug: bool,
469
470    /// Additional tags to attach to all Sentry events.
471    ///
472    /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
473    /// context such as deployment identifiers or environment details.
474    ///
475    /// # Default
476    ///
477    /// Empty (no tags)
478    ///
479    /// # Environment Variables
480    ///
481    /// Each tag is set individually:
482    /// - `OS__SENTRY__TAGS__FOO=foo`
483    /// - `OS__SENTRY__TAGS__BAR=bar`
484    ///
485    /// # YAML Example
486    ///
487    /// ```yaml
488    /// sentry:
489    ///   tags:
490    ///     foo: foo
491    ///     bar: bar
492    /// ```
493    pub tags: BTreeMap<String, String>,
494}
495
496impl Sentry {
497    /// Returns whether Sentry integration is enabled.
498    ///
499    /// Sentry is considered enabled if a DSN is configured.
500    pub fn is_enabled(&self) -> bool {
501        self.dsn.is_some()
502    }
503}
504
505impl Default for Sentry {
506    fn default() -> Self {
507        Self {
508            dsn: None,
509            environment: None,
510            server_name: None,
511            sample_rate: 1.0,
512            traces_sample_rate: 0.01,
513            inherit_sampling_decision: true,
514            debug: false,
515            tags: BTreeMap::new(),
516        }
517    }
518}
519
520/// Log output format.
521///
522/// Controls how log messages are formatted. The format can be explicitly specified or
523/// auto-detected based on whether output is to a TTY.
524#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
525#[serde(rename_all = "lowercase")]
526pub enum LogFormat {
527    /// Auto detect the best format.
528    ///
529    /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
530    Auto,
531
532    /// Pretty printing with colors.
533    ///
534    /// ```text
535    ///  INFO  objectstore::http > objectstore starting
536    /// ```
537    Pretty,
538
539    /// Simplified plain text output.
540    ///
541    /// ```text
542    /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
543    /// ```
544    Simplified,
545
546    /// Dump out JSON lines.
547    ///
548    /// ```text
549    /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
550    /// ```
551    Json,
552}
553
554/// The logging format parse error.
555#[derive(Clone, Debug)]
556pub struct FormatParseError(String);
557
558impl fmt::Display for FormatParseError {
559    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
560        write!(
561            f,
562            r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
563            self.0
564        )
565    }
566}
567
568impl std::str::FromStr for LogFormat {
569    type Err = FormatParseError;
570
571    fn from_str(s: &str) -> Result<Self, Self::Err> {
572        let result = match s {
573            "" => LogFormat::Auto,
574            s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
575            s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
576            s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
577            s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
578            s => return Err(FormatParseError(s.into())),
579        };
580
581        Ok(result)
582    }
583}
584
585impl std::error::Error for FormatParseError {}
586
587mod display_fromstr {
588    pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
589    where
590        S: serde::Serializer,
591        T: std::fmt::Display,
592    {
593        serializer.collect_str(&value)
594    }
595
596    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
597    where
598        D: serde::Deserializer<'de>,
599        T: std::str::FromStr,
600        <T as std::str::FromStr>::Err: std::fmt::Display,
601    {
602        use serde::Deserialize;
603        let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
604        s.parse().map_err(serde::de::Error::custom)
605    }
606}
607
608/// Logging configuration.
609///
610/// Controls the verbosity and format of log output. Logs are always written to stderr.
611///
612/// Used in: [`Config::logging`]
613#[derive(Debug, Deserialize, Serialize)]
614pub struct Logging {
615    /// Minimum log level to output.
616    ///
617    /// Controls which log messages are emitted based on their severity. Messages at or above this
618    /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
619    /// OFF.
620    ///
621    /// The `RUST_LOG` environment variable provides more granular control per module if needed.
622    ///
623    /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
624    /// only for debugging.
625    ///
626    /// # Default
627    ///
628    /// `INFO`
629    ///
630    /// # Environment Variable
631    ///
632    /// `OS__LOGGING__LEVEL`
633    ///
634    /// # Considerations
635    ///
636    /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
637    /// - `INFO` is appropriate for production
638    /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
639    /// -
640    #[serde(with = "display_fromstr")]
641    pub level: LevelFilter,
642
643    /// Log output format.
644    ///
645    /// Determines how log messages are formatted. See [`LogFormat`] for available options and
646    /// examples.
647    ///
648    /// # Default
649    ///
650    /// `Auto` (pretty for TTY, simplified otherwise)
651    ///
652    /// # Environment Variable
653    ///
654    /// `OS__LOGGING__FORMAT`
655    pub format: LogFormat,
656}
657
658impl Default for Logging {
659    fn default() -> Self {
660        Self {
661            level: LevelFilter::INFO,
662            format: LogFormat::Auto,
663        }
664    }
665}
666
667/// Metrics configuration.
668///
669/// Configures submission of internal metrics to Datadog.
670#[derive(Debug, Default, Deserialize, Serialize)]
671pub struct Metrics {
672    /// Datadog [API key] for metrics.
673    ///
674    /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
675    /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
676    ///
677    /// # Default
678    ///
679    /// `None` (Datadog metrics disabled)
680    ///
681    /// # Environment Variable
682    ///
683    /// `OS__METRICS__DATADOG_KEY`
684    ///
685    /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
686    pub datadog_key: Option<SecretBox<ConfigSecret>>,
687
688    /// Global tags applied to all metrics.
689    ///
690    /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
691    /// identifying the environment, region, or other deployment-specific information.
692    ///
693    /// # Default
694    ///
695    /// Empty (no tags)
696    ///
697    /// # Environment Variables
698    ///
699    /// Each tag is set individually:
700    /// - `OS__METRICS__TAGS__FOO=foo`
701    /// - `OS__METRICS__TAGS__BAR=bar`
702    ///
703    /// # YAML Example
704    ///
705    /// ```yaml
706    /// metrics:
707    ///   tags:
708    ///     foo: foo
709    ///     bar: bar
710    /// ```
711    pub tags: BTreeMap<String, String>,
712}
713
714/// A key that may be used to verify a request's `Authorization` header and its
715/// associated permissions. May contain multiple key versions to facilitate rotation.
716#[derive(Debug, Deserialize, Serialize)]
717pub struct AuthZVerificationKey {
718    /// Files that contain versions of this key's key material which may be used to verify
719    /// signatures.
720    ///
721    /// If a key is being rotated, the old and new versions of that key should both be
722    /// configured so objectstore can verify signatures while the updated key is still
723    /// rolling out. Otherwise, this should only contain the most recent version of a key.
724    pub key_files: Vec<PathBuf>,
725
726    /// The maximum set of permissions that this key's signer is authorized to grant.
727    ///
728    /// If a request's `Authorization` header grants full permission but it was signed by
729    /// a key that is only allowed to grant read permission, then the request only has
730    /// read permission.
731    #[serde(default)]
732    pub max_permissions: HashSet<Permission>,
733}
734
735/// Configuration for content-based authorization.
736#[derive(Debug, Default, Deserialize, Serialize)]
737pub struct AuthZ {
738    /// Whether to enforce content-based authorization or not.
739    ///
740    /// If this is set to `false`, checks are still performed but failures will not result
741    /// in `403 Unauthorized` responses.
742    pub enforce: bool,
743
744    /// Keys that may be used to verify a request's `Authorization` header.
745    ///
746    /// This field is a container that is keyed on a key's ID. When verifying a JWT
747    /// from the `Authorization` header, the `kid` field should be read from the JWT
748    /// header and used to index into this map to select the appropriate key.
749    #[serde(default)]
750    pub keys: BTreeMap<String, AuthZVerificationKey>,
751}
752
753/// Main configuration struct for the objectstore server.
754///
755/// This is the top-level configuration that combines all server settings including networking,
756/// storage backends, runtime, and observability options.
757///
758/// Configuration is loaded with the following precedence (highest to lowest):
759/// 1. Environment variables (prefixed with `OS__`)
760/// 2. YAML configuration file (if provided via `-c` flag)
761/// 3. Default values
762///
763/// See individual field documentation for details on each configuration option, including
764/// defaults and environment variables.
765#[derive(Debug, Deserialize, Serialize)]
766pub struct Config {
767    /// HTTP server bind address.
768    ///
769    /// The socket address (IP and port) where the HTTP server will listen for incoming
770    /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
771    /// makes the server accessible from all network interfaces.
772    ///
773    /// # Default
774    ///
775    /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
776    ///
777    /// # Environment Variable
778    ///
779    /// `OS__HTTP_ADDR`
780    pub http_addr: SocketAddr,
781
782    /// Storage backend for high-volume, small objects.
783    ///
784    /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
785    /// access with many small objects is desired. Good candidates include Bigtable, local
786    /// filesystem (for development), or fast SSDs. Can be set to the same backend as
787    /// `long_term_storage` for simplicity.
788    ///
789    /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
790    /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
791    /// change in the future and more configuration options will be added to influence this
792    /// decision.
793    ///
794    /// # Default
795    ///
796    /// Filesystem storage in `./data/high-volume` directory
797    ///
798    /// # Environment Variables
799    ///
800    /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
801    ///   options.
802    ///
803    /// # Example
804    ///
805    /// ```yaml
806    /// high_volume_storage:
807    ///   type: bigtable
808    ///   project_id: my-project
809    ///   instance_name: objectstore
810    ///   table_name: objectstore
811    /// ```
812    pub high_volume_storage: Storage,
813
814    /// Storage backend for large objects with long-term retention.
815    ///
816    /// This backend is used for larger objects in scenarios with lower throughput and higher
817    /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
818    /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
819    ///
820    /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
821    /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
822    /// change in the future and more configuration options will be added to influence this
823    /// decision.
824    ///
825    /// # Default
826    ///
827    /// Filesystem storage in `./data/long-term` directory
828    ///
829    /// # Environment Variables
830    ///
831    /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
832    /// - Additional fields depending on the type (see [`Storage`])
833    ///
834    /// # Example
835    ///
836    /// ```yaml
837    /// long_term_storage:
838    ///   type: gcs
839    ///   bucket: my-objectstore-bucket
840    /// ```
841    pub long_term_storage: Storage,
842
843    /// Configuration of the internal task runtime.
844    ///
845    /// Controls the thread pool size and behavior of the async runtime powering the server.
846    /// See [`Runtime`] for configuration options.
847    pub runtime: Runtime,
848
849    /// Logging configuration.
850    ///
851    /// Controls log verbosity and output format. See [`Logging`] for configuration options.
852    pub logging: Logging,
853
854    /// Sentry error tracking configuration.
855    ///
856    /// Optional integration with Sentry for error tracking and performance monitoring.
857    /// See [`Sentry`] for configuration options.
858    pub sentry: Sentry,
859
860    /// Internal metrics configuration.
861    ///
862    /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
863    /// configuration options.
864    pub metrics: Metrics,
865
866    /// Content-based authorization configuration.
867    ///
868    /// Controls the verification and enforcement of content-based access control based on the
869    /// JWT in a request's `Authorization` header.
870    pub auth: AuthZ,
871
872    /// A list of matchers for requests to discard without processing.
873    pub killswitches: Killswitches,
874
875    /// Definitions for rate limits to enforce on incoming requests.
876    pub rate_limits: RateLimits,
877}
878
879impl Default for Config {
880    fn default() -> Self {
881        Self {
882            http_addr: "0.0.0.0:8888".parse().unwrap(),
883
884            high_volume_storage: Storage::FileSystem {
885                path: PathBuf::from("data/high-volume"),
886            },
887            long_term_storage: Storage::FileSystem {
888                path: PathBuf::from("data/long-term"),
889            },
890
891            runtime: Runtime::default(),
892            logging: Logging::default(),
893            sentry: Sentry::default(),
894            metrics: Metrics::default(),
895            auth: AuthZ::default(),
896            killswitches: Killswitches::default(),
897            rate_limits: RateLimits::default(),
898        }
899    }
900}
901
902impl Config {
903    /// Loads configuration from the provided arguments.
904    ///
905    /// Configuration is merged in the following order (later sources override earlier ones):
906    /// 1. Default values
907    /// 2. YAML configuration file (if provided in `args`)
908    /// 3. Environment variables (prefixed with `OS__`)
909    ///
910    /// # Errors
911    ///
912    /// Returns an error if:
913    /// - The YAML configuration file cannot be read or parsed
914    /// - Environment variables contain invalid values
915    /// - Required fields are missing or invalid
916    pub fn load(path: Option<&Path>) -> Result<Self> {
917        let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
918        if let Some(path) = path {
919            figment = figment.merge(Yaml::file(path));
920        }
921        let config = figment
922            .merge(Env::prefixed(ENV_PREFIX).split("__"))
923            .extract()?;
924
925        Ok(config)
926    }
927}
928
929#[cfg(test)]
930mod tests {
931    use std::io::Write;
932
933    use secrecy::ExposeSecret;
934
935    use crate::killswitches::Killswitch;
936    use crate::rate_limits::{BandwidthLimits, RateLimits, ThroughputLimits, ThroughputRule};
937
938    use super::*;
939
940    #[test]
941    fn configurable_via_env() {
942        figment::Jail::expect_with(|jail| {
943            jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
944            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
945            jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
946            jail.set_env("OS__METRICS__TAGS__FOO", "bar");
947            jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
948            jail.set_env("OS__SENTRY__DSN", "abcde");
949            jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
950            jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
951            jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
952            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
953            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
954
955            let config = Config::load(None).unwrap();
956
957            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
958            else {
959                panic!("expected s3 storage");
960            };
961            assert_eq!(endpoint, "http://localhost:8888");
962            assert_eq!(bucket, "whatever");
963            assert_eq!(
964                config.metrics.tags,
965                [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
966            );
967
968            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
969            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
970            assert_eq!(
971                config.sentry.server_name.as_deref(),
972                Some("objectstore-deadbeef")
973            );
974            assert_eq!(config.sentry.sample_rate, 0.5);
975            assert_eq!(config.sentry.traces_sample_rate, 0.5);
976
977            Ok(())
978        });
979    }
980
981    #[test]
982    fn configurable_via_yaml() {
983        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
984        tempfile
985            .write_all(
986                br#"
987            long_term_storage:
988                type: s3compatible
989                endpoint: http://localhost:8888
990                bucket: whatever
991            sentry:
992                dsn: abcde
993                environment: production
994                server_name: objectstore-deadbeef
995                sample_rate: 0.5
996                traces_sample_rate: 0.5
997            "#,
998            )
999            .unwrap();
1000
1001        figment::Jail::expect_with(|_jail| {
1002            let config = Config::load(Some(tempfile.path())).unwrap();
1003
1004            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1005            else {
1006                panic!("expected s3 storage");
1007            };
1008            assert_eq!(endpoint, "http://localhost:8888");
1009            assert_eq!(bucket, "whatever");
1010
1011            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1012            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1013            assert_eq!(
1014                config.sentry.server_name.as_deref(),
1015                Some("objectstore-deadbeef")
1016            );
1017            assert_eq!(config.sentry.sample_rate, 0.5);
1018            assert_eq!(config.sentry.traces_sample_rate, 0.5);
1019
1020            Ok(())
1021        });
1022    }
1023
1024    #[test]
1025    fn configured_with_env_and_yaml() {
1026        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1027        tempfile
1028            .write_all(
1029                br#"
1030            long_term_storage:
1031                type: s3compatible
1032                endpoint: http://localhost:8888
1033                bucket: whatever
1034            "#,
1035            )
1036            .unwrap();
1037
1038        figment::Jail::expect_with(|jail| {
1039            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1040
1041            let config = Config::load(Some(tempfile.path())).unwrap();
1042
1043            let Storage::S3Compatible {
1044                endpoint,
1045                bucket: _bucket,
1046            } = &dbg!(&config).long_term_storage
1047            else {
1048                panic!("expected s3 storage");
1049            };
1050            // Env should overwrite the yaml config
1051            assert_eq!(endpoint, "http://localhost:9001");
1052
1053            Ok(())
1054        });
1055    }
1056
1057    #[test]
1058    fn configure_auth_with_env() {
1059        figment::Jail::expect_with(|jail| {
1060            jail.set_env("OS__AUTH__ENFORCE", "true");
1061            jail.set_env(
1062                "OS__AUTH__KEYS",
1063                r#"{kid1={key_files=["abcde","fghij","this is a test\n  multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1064            );
1065
1066            let config = Config::load(None).unwrap();
1067
1068            assert!(config.auth.enforce);
1069
1070            let kid1 = config.auth.keys.get("kid1").unwrap();
1071            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1072            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1073            assert_eq!(
1074                kid1.key_files[2],
1075                Path::new("this is a test\n  multiline string\nend of string\n"),
1076            );
1077            assert_eq!(
1078                kid1.max_permissions,
1079                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1080            );
1081
1082            let kid2 = config.auth.keys.get("kid2").unwrap();
1083            assert_eq!(kid2.key_files[0], Path::new("12345"));
1084            assert_eq!(kid2.max_permissions, HashSet::new());
1085
1086            Ok(())
1087        });
1088    }
1089
1090    #[test]
1091    fn configure_auth_with_yaml() {
1092        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1093        tempfile
1094            .write_all(
1095                br#"
1096                auth:
1097                    enforce: true
1098                    keys:
1099                        kid1:
1100                            key_files:
1101                                - "abcde"
1102                                - "fghij"
1103                                - |
1104                                  this is a test
1105                                    multiline string
1106                                  end of string
1107                            max_permissions:
1108                                - "object.read"
1109                                - "object.write"
1110                        kid2:
1111                            key_files:
1112                                - "12345"
1113            "#,
1114            )
1115            .unwrap();
1116
1117        figment::Jail::expect_with(|_jail| {
1118            let config = Config::load(Some(tempfile.path())).unwrap();
1119
1120            assert!(config.auth.enforce);
1121
1122            let kid1 = config.auth.keys.get("kid1").unwrap();
1123            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1124            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1125            assert_eq!(
1126                kid1.key_files[2],
1127                Path::new("this is a test\n  multiline string\nend of string\n")
1128            );
1129            assert_eq!(
1130                kid1.max_permissions,
1131                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1132            );
1133
1134            let kid2 = config.auth.keys.get("kid2").unwrap();
1135            assert_eq!(kid2.key_files[0], Path::new("12345"));
1136            assert_eq!(kid2.max_permissions, HashSet::new());
1137
1138            Ok(())
1139        });
1140    }
1141
1142    #[test]
1143    fn configure_killswitches_with_yaml() {
1144        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1145        tempfile
1146            .write_all(
1147                br#"
1148                killswitches:
1149                  - usecase: broken_usecase
1150                  - scopes:
1151                      org: "42"
1152                  - scopes:
1153                      org: "42"
1154                      project: "4711"
1155                  - usecase: attachments
1156                    scopes:
1157                      org: "42"
1158                "#,
1159            )
1160            .unwrap();
1161
1162        figment::Jail::expect_with(|_jail| {
1163            let expected = [
1164                Killswitch {
1165                    usecase: Some("broken_usecase".into()),
1166                    scopes: BTreeMap::new(),
1167                },
1168                Killswitch {
1169                    usecase: None,
1170                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1171                },
1172                Killswitch {
1173                    usecase: None,
1174                    scopes: BTreeMap::from([
1175                        ("org".into(), "42".into()),
1176                        ("project".into(), "4711".into()),
1177                    ]),
1178                },
1179                Killswitch {
1180                    usecase: Some("attachments".into()),
1181                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1182                },
1183            ];
1184
1185            let config = Config::load(Some(tempfile.path())).unwrap();
1186            assert_eq!(&config.killswitches.0, &expected,);
1187
1188            Ok(())
1189        });
1190    }
1191
1192    #[test]
1193    fn configure_rate_limits_with_yaml() {
1194        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1195        tempfile
1196            .write_all(
1197                br#"
1198                rate_limits:
1199                  throughput:
1200                    global_rps: 1000
1201                    burst: 100
1202                    usecase_pct: 50
1203                    scope_pct: 25
1204                    rules:
1205                      - usecase: "high_priority"
1206                        scopes:
1207                          - ["org", "123"]
1208                        rps: 500
1209                      - scopes:
1210                          - ["org", "456"]
1211                          - ["project", "789"]
1212                        pct: 10
1213                "#,
1214            )
1215            .unwrap();
1216
1217        figment::Jail::expect_with(|_jail| {
1218            let expected = RateLimits {
1219                throughput: ThroughputLimits {
1220                    global_rps: Some(1000),
1221                    burst: 100,
1222                    usecase_pct: Some(50),
1223                    scope_pct: Some(25),
1224                    rules: vec![
1225                        ThroughputRule {
1226                            usecase: Some("high_priority".to_string()),
1227                            scopes: vec![("org".to_string(), "123".to_string())],
1228                            rps: Some(500),
1229                            pct: None,
1230                        },
1231                        ThroughputRule {
1232                            usecase: None,
1233                            scopes: vec![
1234                                ("org".to_string(), "456".to_string()),
1235                                ("project".to_string(), "789".to_string()),
1236                            ],
1237                            rps: None,
1238                            pct: Some(10),
1239                        },
1240                    ],
1241                },
1242                bandwidth: BandwidthLimits::default(),
1243            };
1244
1245            let config = Config::load(Some(tempfile.path())).unwrap();
1246            assert_eq!(config.rate_limits, expected);
1247
1248            Ok(())
1249        });
1250    }
1251}