objectstore_server/
config.rs

1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//!   type: filesystem
31//!   path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49
50/// Environment variable prefix for all configuration options.
51const ENV_PREFIX: &str = "OS__";
52
53/// Newtype around `String` that may protect against accidental
54/// logging of secrets in our configuration struct. Use with
55/// [`secrecy::SecretBox`].
56#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
57pub struct ConfigSecret(String);
58
59impl ConfigSecret {
60    pub fn as_str(&self) -> &str {
61        self.0.as_str()
62    }
63}
64
65impl From<&str> for ConfigSecret {
66    fn from(str: &str) -> Self {
67        ConfigSecret(str.to_string())
68    }
69}
70
71impl std::ops::Deref for ConfigSecret {
72    type Target = str;
73    fn deref(&self) -> &Self::Target {
74        &self.0
75    }
76}
77
78impl fmt::Debug for ConfigSecret {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
80        write!(f, "[redacted]")
81    }
82}
83
84impl CloneableSecret for ConfigSecret {}
85impl SerializableSecret for ConfigSecret {}
86impl Zeroize for ConfigSecret {
87    fn zeroize(&mut self) {
88        self.0.zeroize();
89    }
90}
91
92/// Storage backend configuration.
93///
94/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
95///
96/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
97#[derive(Debug, Deserialize, Serialize)]
98#[serde(tag = "type", rename_all = "lowercase")]
99pub enum Storage {
100    /// Local filesystem storage backend (type `"filesystem"`).
101    ///
102    /// Stores objects as files on the local filesystem. Suitable for development, testing,
103    /// and single-server deployments.
104    ///
105    /// # Example
106    ///
107    /// ```yaml
108    /// long_term_storage:
109    ///   type: filesystem
110    ///   path: /data
111    /// ```
112    FileSystem {
113        /// Directory path for storing objects.
114        ///
115        /// The directory will be created if it doesn't exist. Relative paths are resolved from
116        /// the server's working directory.
117        ///
118        /// # Default
119        ///
120        /// `"data"` (relative to the server's working directory)
121        ///
122        /// # Environment Variables
123        ///
124        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
125        /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
126        ///
127        /// Or for long-term storage:
128        /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
129        /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
130        path: PathBuf,
131    },
132
133    /// S3-compatible storage backend (type `"s3compatible"`).
134    ///
135    /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
136    /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
137    ///
138    /// [Amazon S3]: https://aws.amazon.com/s3/
139    ///
140    /// # Example
141    ///
142    /// ```yaml
143    /// long_term_storage:
144    ///   type: s3compatible
145    ///   endpoint: https://s3.amazonaws.com
146    ///   bucket: my-bucket
147    /// ```
148    S3Compatible {
149        /// S3 endpoint URL.
150        ///
151        /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
152        ///
153        /// # Environment Variables
154        ///
155        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
156        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
157        ///
158        /// Or for long-term storage:
159        /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
160        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
161        endpoint: String,
162
163        /// S3 bucket name.
164        ///
165        /// The bucket must exist before starting the server.
166        ///
167        /// # Environment Variables
168        ///
169        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
170        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
171        bucket: String,
172    },
173
174    /// [Google Cloud Storage] backend (type `"gcs"`).
175    ///
176    /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
177    /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
178    /// environment variable or GCE/GKE metadata service.
179    ///
180    /// **Note**: The bucket must be pre-created with the following lifecycle policy:
181    /// - `daysSinceCustomTime`: 1 day
182    /// - `action`: delete
183    ///
184    /// [Google Cloud Storage]: https://cloud.google.com/storage
185    ///
186    /// # Example
187    ///
188    /// ```yaml
189    /// long_term_storage:
190    ///   type: gcs
191    ///   bucket: objectstore-bucket
192    /// ```
193    Gcs {
194        /// Optional custom GCS endpoint URL.
195        ///
196        /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
197        ///
198        /// # Default
199        ///
200        /// `None` (uses default GCS endpoint)
201        ///
202        /// # Environment Variables
203        ///
204        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
205        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
206        ///
207        /// Or for long-term storage:
208        /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
209        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
210        endpoint: Option<String>,
211
212        /// GCS bucket name.
213        ///
214        /// The bucket must exist before starting the server.
215        ///
216        /// # Environment Variables
217        ///
218        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
219        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
220        bucket: String,
221    },
222
223    /// [Google Bigtable] backend (type `"bigtable"`).
224    ///
225    /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
226    /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
227    /// Application Default Credentials (ADC).
228    ///
229    /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
230    /// following column families:
231    /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
232    /// - `fm`: manual garbage collection (`no GC policy`)
233    ///
234    /// [Google Bigtable]: https://cloud.google.com/bigtable
235    ///
236    /// # Example
237    ///
238    /// ```yaml
239    /// high_volume_storage:
240    ///   type: bigtable
241    ///   project_id: my-project
242    ///   instance_name: objectstore
243    ///   table_name: objectstore
244    /// ```
245    BigTable {
246        /// Optional custom Bigtable endpoint.
247        ///
248        /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
249        ///
250        /// # Default
251        ///
252        /// `None` (uses default Bigtable endpoint)
253        ///
254        /// # Environment Variables
255        ///
256        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
257        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
258        ///
259        /// Or for long-term storage:
260        /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
261        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
262        endpoint: Option<String>,
263
264        /// GCP project ID.
265        ///
266        /// The Google project ID (not project number) containing the Bigtable instance.
267        ///
268        /// # Environment Variables
269        ///
270        /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
271        /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
272        project_id: String,
273
274        /// Bigtable instance name.
275        ///
276        /// # Environment Variables
277        ///
278        /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
279        /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
280        instance_name: String,
281
282        /// Bigtable table name.
283        ///
284        /// The table must exist before starting the server.
285        ///
286        /// # Environment Variables
287        ///
288        /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
289        /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
290        table_name: String,
291
292        /// Optional number of connections to maintain to Bigtable.
293        ///
294        /// # Default
295        ///
296        /// `None` (infers connection count based on CPU count)
297        ///
298        /// # Environment Variables
299        ///
300        /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
301        /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
302        connections: Option<usize>,
303    },
304}
305
306/// Runtime configuration for the Tokio async runtime.
307///
308/// Controls the threading behavior of the server's async runtime.
309///
310/// Used in: [`Config::runtime`]
311#[derive(Debug, Deserialize, Serialize)]
312#[serde(default)]
313pub struct Runtime {
314    /// Number of worker threads for the server runtime.
315    ///
316    /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
317    /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
318    /// switching overhead.
319    ///
320    /// Set this in accordance with the resources available to the server, especially in Kubernetes
321    /// environments.
322    ///
323    /// # Default
324    ///
325    /// Defaults to the number of CPU cores on the host machine.
326    ///
327    /// # Environment Variable
328    ///
329    /// `OS__RUNTIME__WORKER_THREADS`
330    ///
331    /// # Considerations
332    ///
333    /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
334    /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
335    /// - Setting this too high can lead to increased memory usage and context switching
336    pub worker_threads: usize,
337
338    /// Interval in seconds for reporting internal runtime metrics.
339    ///
340    /// Defaults to `10` seconds.
341    #[serde(with = "humantime_serde")]
342    pub metrics_interval: Duration,
343}
344
345impl Default for Runtime {
346    fn default() -> Self {
347        Self {
348            worker_threads: num_cpus::get(),
349            metrics_interval: Duration::from_secs(10),
350        }
351    }
352}
353
354/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
355///
356/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
357/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
358///
359/// Used in: [`Config::sentry`]
360#[derive(Debug, Deserialize, Serialize)]
361pub struct Sentry {
362    /// Sentry DSN (Data Source Name).
363    ///
364    /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
365    /// integration is completely disabled.
366    ///
367    /// # Default
368    ///
369    /// `None` (Sentry disabled)
370    ///
371    /// # Environment Variable
372    ///
373    /// `OS__SENTRY__DSN`
374    pub dsn: Option<SecretBox<ConfigSecret>>,
375
376    /// Environment name for this deployment.
377    ///
378    /// Used to distinguish events from different environments (e.g., "production", "staging",
379    /// "development"). This appears in the Sentry UI and can be used for filtering.
380    ///
381    /// # Default
382    ///
383    /// `None`
384    ///
385    /// # Environment Variable
386    ///
387    /// `OS__SENTRY__ENVIRONMENT`
388    pub environment: Option<Cow<'static, str>>,
389
390    /// Server name or identifier.
391    ///
392    /// Used to identify which server instance sent an event. Useful in multi-server deployments for
393    /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
394    ///
395    /// # Default
396    ///
397    /// `None`
398    ///
399    /// # Environment Variable
400    ///
401    /// `OS__SENTRY__SERVER_NAME`
402    pub server_name: Option<Cow<'static, str>>,
403
404    /// Error event sampling rate.
405    ///
406    /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
407    /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
408    ///
409    /// # Default
410    ///
411    /// `1.0` (send all errors)
412    ///
413    /// # Environment Variable
414    ///
415    /// `OS__SENTRY__SAMPLE_RATE`
416    pub sample_rate: f32,
417
418    /// Performance trace sampling rate.
419    ///
420    /// Controls what percentage of transactions (traces) are sent to Sentry for performance
421    /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
422    ///
423    /// **Important**: Performance traces can generate significant data volume in high-traffic
424    /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
425    ///
426    /// # Default
427    ///
428    /// `0.01` (send 1% of traces)
429    ///
430    /// # Environment Variable
431    ///
432    /// `OS__SENTRY__TRACES_SAMPLE_RATE`
433    pub traces_sample_rate: f32,
434
435    /// Whether to inherit sampling decisions from incoming traces.
436    ///
437    /// When `true` (default), if an incoming request contains a distributed tracing header with a
438    /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
439    /// the local `traces_sample_rate` is always used instead.
440    ///
441    /// When this is enabled, the calling service effectively controls the sampling decision for the
442    /// entire trace. Set this to `false` if you want to have independent sampling control at the
443    /// objectstore level.
444    ///
445    /// # Default
446    ///
447    /// `true`
448    ///
449    /// # Environment Variable
450    ///
451    /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
452    pub inherit_sampling_decision: bool,
453
454    /// Enable Sentry SDK debug mode.
455    ///
456    /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
457    /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
458    /// production as it generates verbose logging.
459    ///
460    /// # Default
461    ///
462    /// `false`
463    ///
464    /// # Environment Variable
465    ///
466    /// `OS__SENTRY__DEBUG`
467    pub debug: bool,
468
469    /// Additional tags to attach to all Sentry events.
470    ///
471    /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
472    /// context such as deployment identifiers or environment details.
473    ///
474    /// # Default
475    ///
476    /// Empty (no tags)
477    ///
478    /// # Environment Variables
479    ///
480    /// Each tag is set individually:
481    /// - `OS__SENTRY__TAGS__FOO=foo`
482    /// - `OS__SENTRY__TAGS__BAR=bar`
483    ///
484    /// # YAML Example
485    ///
486    /// ```yaml
487    /// sentry:
488    ///   tags:
489    ///     foo: foo
490    ///     bar: bar
491    /// ```
492    pub tags: BTreeMap<String, String>,
493}
494
495impl Sentry {
496    /// Returns whether Sentry integration is enabled.
497    ///
498    /// Sentry is considered enabled if a DSN is configured.
499    pub fn is_enabled(&self) -> bool {
500        self.dsn.is_some()
501    }
502}
503
504impl Default for Sentry {
505    fn default() -> Self {
506        Self {
507            dsn: None,
508            environment: None,
509            server_name: None,
510            sample_rate: 1.0,
511            traces_sample_rate: 0.01,
512            inherit_sampling_decision: true,
513            debug: false,
514            tags: BTreeMap::new(),
515        }
516    }
517}
518
519/// Log output format.
520///
521/// Controls how log messages are formatted. The format can be explicitly specified or
522/// auto-detected based on whether output is to a TTY.
523#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
524#[serde(rename_all = "lowercase")]
525pub enum LogFormat {
526    /// Auto detect the best format.
527    ///
528    /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
529    Auto,
530
531    /// Pretty printing with colors.
532    ///
533    /// ```text
534    ///  INFO  objectstore::http > objectstore starting
535    /// ```
536    Pretty,
537
538    /// Simplified plain text output.
539    ///
540    /// ```text
541    /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
542    /// ```
543    Simplified,
544
545    /// Dump out JSON lines.
546    ///
547    /// ```text
548    /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
549    /// ```
550    Json,
551}
552
553/// The logging format parse error.
554#[derive(Clone, Debug)]
555pub struct FormatParseError(String);
556
557impl fmt::Display for FormatParseError {
558    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559        write!(
560            f,
561            r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
562            self.0
563        )
564    }
565}
566
567impl std::str::FromStr for LogFormat {
568    type Err = FormatParseError;
569
570    fn from_str(s: &str) -> Result<Self, Self::Err> {
571        let result = match s {
572            "" => LogFormat::Auto,
573            s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
574            s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
575            s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
576            s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
577            s => return Err(FormatParseError(s.into())),
578        };
579
580        Ok(result)
581    }
582}
583
584impl std::error::Error for FormatParseError {}
585
586mod display_fromstr {
587    pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
588    where
589        S: serde::Serializer,
590        T: std::fmt::Display,
591    {
592        serializer.collect_str(&value)
593    }
594
595    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
596    where
597        D: serde::Deserializer<'de>,
598        T: std::str::FromStr,
599        <T as std::str::FromStr>::Err: std::fmt::Display,
600    {
601        use serde::Deserialize;
602        let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
603        s.parse().map_err(serde::de::Error::custom)
604    }
605}
606
607/// Logging configuration.
608///
609/// Controls the verbosity and format of log output. Logs are always written to stderr.
610///
611/// Used in: [`Config::logging`]
612#[derive(Debug, Deserialize, Serialize)]
613pub struct Logging {
614    /// Minimum log level to output.
615    ///
616    /// Controls which log messages are emitted based on their severity. Messages at or above this
617    /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
618    /// OFF.
619    ///
620    /// The `RUST_LOG` environment variable provides more granular control per module if needed.
621    ///
622    /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
623    /// only for debugging.
624    ///
625    /// # Default
626    ///
627    /// `INFO`
628    ///
629    /// # Environment Variable
630    ///
631    /// `OS__LOGGING__LEVEL`
632    ///
633    /// # Considerations
634    ///
635    /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
636    /// - `INFO` is appropriate for production
637    /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
638    /// -
639    #[serde(with = "display_fromstr")]
640    pub level: LevelFilter,
641
642    /// Log output format.
643    ///
644    /// Determines how log messages are formatted. See [`LogFormat`] for available options and
645    /// examples.
646    ///
647    /// # Default
648    ///
649    /// `Auto` (pretty for TTY, simplified otherwise)
650    ///
651    /// # Environment Variable
652    ///
653    /// `OS__LOGGING__FORMAT`
654    pub format: LogFormat,
655}
656
657impl Default for Logging {
658    fn default() -> Self {
659        Self {
660            level: LevelFilter::INFO,
661            format: LogFormat::Auto,
662        }
663    }
664}
665
666/// Metrics configuration.
667///
668/// Configures submission of internal metrics to Datadog.
669#[derive(Debug, Default, Deserialize, Serialize)]
670pub struct Metrics {
671    /// Datadog [API key] for metrics.
672    ///
673    /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
674    /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
675    ///
676    /// # Default
677    ///
678    /// `None` (Datadog metrics disabled)
679    ///
680    /// # Environment Variable
681    ///
682    /// `OS__METRICS__DATADOG_KEY`
683    ///
684    /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
685    pub datadog_key: Option<SecretBox<ConfigSecret>>,
686
687    /// Global tags applied to all metrics.
688    ///
689    /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
690    /// identifying the environment, region, or other deployment-specific information.
691    ///
692    /// # Default
693    ///
694    /// Empty (no tags)
695    ///
696    /// # Environment Variables
697    ///
698    /// Each tag is set individually:
699    /// - `OS__METRICS__TAGS__FOO=foo`
700    /// - `OS__METRICS__TAGS__BAR=bar`
701    ///
702    /// # YAML Example
703    ///
704    /// ```yaml
705    /// metrics:
706    ///   tags:
707    ///     foo: foo
708    ///     bar: bar
709    /// ```
710    pub tags: BTreeMap<String, String>,
711}
712
713/// A key that may be used to verify a request's `Authorization` header and its
714/// associated permissions. May contain multiple key versions to facilitate rotation.
715#[derive(Debug, Deserialize, Serialize)]
716pub struct AuthZVerificationKey {
717    /// Files that contain versions of this key's key material which may be used to verify
718    /// signatures.
719    ///
720    /// If a key is being rotated, the old and new versions of that key should both be
721    /// configured so objectstore can verify signatures while the updated key is still
722    /// rolling out. Otherwise, this should only contain the most recent version of a key.
723    pub key_files: Vec<PathBuf>,
724
725    /// The maximum set of permissions that this key's signer is authorized to grant.
726    ///
727    /// If a request's `Authorization` header grants full permission but it was signed by
728    /// a key that is only allowed to grant read permission, then the request only has
729    /// read permission.
730    #[serde(default)]
731    pub max_permissions: HashSet<Permission>,
732}
733
734/// Configuration for content-based authorization.
735#[derive(Debug, Default, Deserialize, Serialize)]
736pub struct AuthZ {
737    /// Whether to enforce content-based authorization or not.
738    ///
739    /// If this is set to `false`, checks are still performed but failures will not result
740    /// in `403 Unauthorized` responses.
741    pub enforce: bool,
742
743    /// Keys that may be used to verify a request's `Authorization` header.
744    ///
745    /// This field is a container that is keyed on a key's ID. When verifying a JWT
746    /// from the `Authorization` header, the `kid` field should be read from the JWT
747    /// header and used to index into this map to select the appropriate key.
748    #[serde(default)]
749    pub keys: BTreeMap<String, AuthZVerificationKey>,
750}
751
752/// Main configuration struct for the objectstore server.
753///
754/// This is the top-level configuration that combines all server settings including networking,
755/// storage backends, runtime, and observability options.
756///
757/// Configuration is loaded with the following precedence (highest to lowest):
758/// 1. Environment variables (prefixed with `OS__`)
759/// 2. YAML configuration file (if provided via `-c` flag)
760/// 3. Default values
761///
762/// See individual field documentation for details on each configuration option, including
763/// defaults and environment variables.
764#[derive(Debug, Deserialize, Serialize)]
765pub struct Config {
766    /// HTTP server bind address.
767    ///
768    /// The socket address (IP and port) where the HTTP server will listen for incoming
769    /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
770    /// makes the server accessible from all network interfaces.
771    ///
772    /// # Default
773    ///
774    /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
775    ///
776    /// # Environment Variable
777    ///
778    /// `OS__HTTP_ADDR`
779    pub http_addr: SocketAddr,
780
781    /// Storage backend for high-volume, small objects.
782    ///
783    /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
784    /// access with many small objects is desired. Good candidates include Bigtable, local
785    /// filesystem (for development), or fast SSDs. Can be set to the same backend as
786    /// `long_term_storage` for simplicity.
787    ///
788    /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
789    /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
790    /// change in the future and more configuration options will be added to influence this
791    /// decision.
792    ///
793    /// # Default
794    ///
795    /// Filesystem storage in `./data/high-volume` directory
796    ///
797    /// # Environment Variables
798    ///
799    /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
800    ///   options.
801    ///
802    /// # Example
803    ///
804    /// ```yaml
805    /// high_volume_storage:
806    ///   type: bigtable
807    ///   project_id: my-project
808    ///   instance_name: objectstore
809    ///   table_name: objectstore
810    /// ```
811    pub high_volume_storage: Storage,
812
813    /// Storage backend for large objects with long-term retention.
814    ///
815    /// This backend is used for larger objects in scenarios with lower throughput and higher
816    /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
817    /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
818    ///
819    /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
820    /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
821    /// change in the future and more configuration options will be added to influence this
822    /// decision.
823    ///
824    /// # Default
825    ///
826    /// Filesystem storage in `./data/long-term` directory
827    ///
828    /// # Environment Variables
829    ///
830    /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
831    /// - Additional fields depending on the type (see [`Storage`])
832    ///
833    /// # Example
834    ///
835    /// ```yaml
836    /// long_term_storage:
837    ///   type: gcs
838    ///   bucket: my-objectstore-bucket
839    /// ```
840    pub long_term_storage: Storage,
841
842    /// Configuration of the internal task runtime.
843    ///
844    /// Controls the thread pool size and behavior of the async runtime powering the server.
845    /// See [`Runtime`] for configuration options.
846    pub runtime: Runtime,
847
848    /// Logging configuration.
849    ///
850    /// Controls log verbosity and output format. See [`Logging`] for configuration options.
851    pub logging: Logging,
852
853    /// Sentry error tracking configuration.
854    ///
855    /// Optional integration with Sentry for error tracking and performance monitoring.
856    /// See [`Sentry`] for configuration options.
857    pub sentry: Sentry,
858
859    /// Internal metrics configuration.
860    ///
861    /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
862    /// configuration options.
863    pub metrics: Metrics,
864
865    /// Content-based authorization configuration.
866    ///
867    /// Controls the verification and enforcement of content-based access control based on the
868    /// JWT in a request's `Authorization` header.
869    pub auth: AuthZ,
870
871    /// A list of matchers for requests to discard without processing.
872    pub killswitches: Killswitches,
873}
874
875impl Default for Config {
876    fn default() -> Self {
877        Self {
878            http_addr: "0.0.0.0:8888".parse().unwrap(),
879
880            high_volume_storage: Storage::FileSystem {
881                path: PathBuf::from("data/high-volume"),
882            },
883            long_term_storage: Storage::FileSystem {
884                path: PathBuf::from("data/long-term"),
885            },
886
887            runtime: Runtime::default(),
888            logging: Logging::default(),
889            sentry: Sentry::default(),
890            metrics: Metrics::default(),
891            auth: AuthZ::default(),
892            killswitches: Killswitches::default(),
893        }
894    }
895}
896
897impl Config {
898    /// Loads configuration from the provided arguments.
899    ///
900    /// Configuration is merged in the following order (later sources override earlier ones):
901    /// 1. Default values
902    /// 2. YAML configuration file (if provided in `args`)
903    /// 3. Environment variables (prefixed with `OS__`)
904    ///
905    /// # Errors
906    ///
907    /// Returns an error if:
908    /// - The YAML configuration file cannot be read or parsed
909    /// - Environment variables contain invalid values
910    /// - Required fields are missing or invalid
911    pub fn load(path: Option<&Path>) -> Result<Self> {
912        let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
913        if let Some(path) = path {
914            figment = figment.merge(Yaml::file(path));
915        }
916        let config = figment
917            .merge(Env::prefixed(ENV_PREFIX).split("__"))
918            .extract()?;
919
920        Ok(config)
921    }
922}
923
924#[cfg(test)]
925mod tests {
926    use std::io::Write;
927
928    use secrecy::ExposeSecret;
929
930    use crate::killswitches::Killswitch;
931
932    use super::*;
933
934    #[test]
935    fn configurable_via_env() {
936        figment::Jail::expect_with(|jail| {
937            jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
938            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
939            jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
940            jail.set_env("OS__METRICS__TAGS__FOO", "bar");
941            jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
942            jail.set_env("OS__SENTRY__DSN", "abcde");
943            jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
944            jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
945            jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
946            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
947            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
948
949            let config = Config::load(None).unwrap();
950
951            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
952            else {
953                panic!("expected s3 storage");
954            };
955            assert_eq!(endpoint, "http://localhost:8888");
956            assert_eq!(bucket, "whatever");
957            assert_eq!(
958                config.metrics.tags,
959                [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
960            );
961
962            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
963            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
964            assert_eq!(
965                config.sentry.server_name.as_deref(),
966                Some("objectstore-deadbeef")
967            );
968            assert_eq!(config.sentry.sample_rate, 0.5);
969            assert_eq!(config.sentry.traces_sample_rate, 0.5);
970
971            Ok(())
972        });
973    }
974
975    #[test]
976    fn configurable_via_yaml() {
977        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
978        tempfile
979            .write_all(
980                br#"
981            long_term_storage:
982                type: s3compatible
983                endpoint: http://localhost:8888
984                bucket: whatever
985            sentry:
986                dsn: abcde
987                environment: production
988                server_name: objectstore-deadbeef
989                sample_rate: 0.5
990                traces_sample_rate: 0.5
991            "#,
992            )
993            .unwrap();
994
995        figment::Jail::expect_with(|_jail| {
996            let config = Config::load(Some(tempfile.path())).unwrap();
997
998            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
999            else {
1000                panic!("expected s3 storage");
1001            };
1002            assert_eq!(endpoint, "http://localhost:8888");
1003            assert_eq!(bucket, "whatever");
1004
1005            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1006            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1007            assert_eq!(
1008                config.sentry.server_name.as_deref(),
1009                Some("objectstore-deadbeef")
1010            );
1011            assert_eq!(config.sentry.sample_rate, 0.5);
1012            assert_eq!(config.sentry.traces_sample_rate, 0.5);
1013
1014            Ok(())
1015        });
1016    }
1017
1018    #[test]
1019    fn configured_with_env_and_yaml() {
1020        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1021        tempfile
1022            .write_all(
1023                br#"
1024            long_term_storage:
1025                type: s3compatible
1026                endpoint: http://localhost:8888
1027                bucket: whatever
1028            "#,
1029            )
1030            .unwrap();
1031
1032        figment::Jail::expect_with(|jail| {
1033            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1034
1035            let config = Config::load(Some(tempfile.path())).unwrap();
1036
1037            let Storage::S3Compatible {
1038                endpoint,
1039                bucket: _bucket,
1040            } = &dbg!(&config).long_term_storage
1041            else {
1042                panic!("expected s3 storage");
1043            };
1044            // Env should overwrite the yaml config
1045            assert_eq!(endpoint, "http://localhost:9001");
1046
1047            Ok(())
1048        });
1049    }
1050
1051    #[test]
1052    fn configure_auth_with_env() {
1053        figment::Jail::expect_with(|jail| {
1054            jail.set_env("OS__AUTH__ENFORCE", "true");
1055            jail.set_env(
1056                "OS__AUTH__KEYS",
1057                r#"{kid1={key_files=["abcde","fghij","this is a test\n  multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1058            );
1059
1060            let config = Config::load(None).unwrap();
1061
1062            assert!(config.auth.enforce);
1063
1064            let kid1 = config.auth.keys.get("kid1").unwrap();
1065            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1066            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1067            assert_eq!(
1068                kid1.key_files[2],
1069                Path::new("this is a test\n  multiline string\nend of string\n"),
1070            );
1071            assert_eq!(
1072                kid1.max_permissions,
1073                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1074            );
1075
1076            let kid2 = config.auth.keys.get("kid2").unwrap();
1077            assert_eq!(kid2.key_files[0], Path::new("12345"));
1078            assert_eq!(kid2.max_permissions, HashSet::new());
1079
1080            Ok(())
1081        });
1082    }
1083
1084    #[test]
1085    fn configure_auth_with_yaml() {
1086        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1087        tempfile
1088            .write_all(
1089                br#"
1090                auth:
1091                    enforce: true
1092                    keys:
1093                        kid1:
1094                            key_files:
1095                                - "abcde"
1096                                - "fghij"
1097                                - |
1098                                  this is a test
1099                                    multiline string
1100                                  end of string
1101                            max_permissions:
1102                                - "object.read"
1103                                - "object.write"
1104                        kid2:
1105                            key_files:
1106                                - "12345"
1107            "#,
1108            )
1109            .unwrap();
1110
1111        figment::Jail::expect_with(|_jail| {
1112            let config = Config::load(Some(tempfile.path())).unwrap();
1113
1114            assert!(config.auth.enforce);
1115
1116            let kid1 = config.auth.keys.get("kid1").unwrap();
1117            assert_eq!(kid1.key_files[0], Path::new("abcde"));
1118            assert_eq!(kid1.key_files[1], Path::new("fghij"));
1119            assert_eq!(
1120                kid1.key_files[2],
1121                Path::new("this is a test\n  multiline string\nend of string\n")
1122            );
1123            assert_eq!(
1124                kid1.max_permissions,
1125                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1126            );
1127
1128            let kid2 = config.auth.keys.get("kid2").unwrap();
1129            assert_eq!(kid2.key_files[0], Path::new("12345"));
1130            assert_eq!(kid2.max_permissions, HashSet::new());
1131
1132            Ok(())
1133        });
1134    }
1135
1136    #[test]
1137    fn configure_killswitches_with_yaml() {
1138        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1139        tempfile
1140            .write_all(
1141                br#"
1142                killswitches:
1143                  - usecase: broken_usecase
1144                  - scopes:
1145                      org: "42"
1146                  - scopes:
1147                      org: "42"
1148                      project: "4711"
1149                  - usecase: attachments
1150                    scopes:
1151                      org: "42"
1152                "#,
1153            )
1154            .unwrap();
1155
1156        figment::Jail::expect_with(|_jail| {
1157            let expected = [
1158                Killswitch {
1159                    usecase: Some("broken_usecase".into()),
1160                    scopes: BTreeMap::new(),
1161                },
1162                Killswitch {
1163                    usecase: None,
1164                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1165                },
1166                Killswitch {
1167                    usecase: None,
1168                    scopes: BTreeMap::from([
1169                        ("org".into(), "42".into()),
1170                        ("project".into(), "4711".into()),
1171                    ]),
1172                },
1173                Killswitch {
1174                    usecase: Some("attachments".into()),
1175                    scopes: BTreeMap::from([("org".into(), "42".into())]),
1176                },
1177            ];
1178
1179            let config = Config::load(Some(tempfile.path())).unwrap();
1180            assert_eq!(&config.killswitches.0, &expected,);
1181
1182            Ok(())
1183        });
1184    }
1185}