objectstore_server/
config.rs

1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//!   type: filesystem
31//!   path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39
40use anyhow::Result;
41use figment::providers::{Env, Format, Serialized, Yaml};
42use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
43use serde::{Deserialize, Serialize};
44use tracing::level_filters::LevelFilter;
45
46use crate::auth::Permission;
47
48/// Environment variable prefix for all configuration options.
49const ENV_PREFIX: &str = "OS__";
50
51/// Newtype around `String` that may protect against accidental
52/// logging of secrets in our configuration struct. Use with
53/// [`secrecy::SecretBox`].
54#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
55pub struct ConfigSecret(String);
56
57impl ConfigSecret {
58    pub fn as_str(&self) -> &str {
59        self.0.as_str()
60    }
61}
62
63impl std::ops::Deref for ConfigSecret {
64    type Target = str;
65    fn deref(&self) -> &Self::Target {
66        &self.0
67    }
68}
69
70impl fmt::Debug for ConfigSecret {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
72        write!(f, "[redacted]")
73    }
74}
75
76impl CloneableSecret for ConfigSecret {}
77impl SerializableSecret for ConfigSecret {}
78impl Zeroize for ConfigSecret {
79    fn zeroize(&mut self) {
80        self.0.zeroize();
81    }
82}
83
84/// Storage backend configuration.
85///
86/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
87///
88/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
89#[derive(Debug, Clone, Deserialize, Serialize)]
90#[serde(tag = "type", rename_all = "lowercase")]
91pub enum Storage {
92    /// Local filesystem storage backend (type `"filesystem"`).
93    ///
94    /// Stores objects as files on the local filesystem. Suitable for development, testing,
95    /// and single-server deployments.
96    ///
97    /// # Example
98    ///
99    /// ```yaml
100    /// long_term_storage:
101    ///   type: filesystem
102    ///   path: /data
103    /// ```
104    FileSystem {
105        /// Directory path for storing objects.
106        ///
107        /// The directory will be created if it doesn't exist. Relative paths are resolved from
108        /// the server's working directory.
109        ///
110        /// # Default
111        ///
112        /// `"data"` (relative to the server's working directory)
113        ///
114        /// # Environment Variables
115        ///
116        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
117        /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
118        ///
119        /// Or for long-term storage:
120        /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
121        /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
122        path: PathBuf,
123    },
124
125    /// S3-compatible storage backend (type `"s3compatible"`).
126    ///
127    /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
128    /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
129    ///
130    /// [Amazon S3]: https://aws.amazon.com/s3/
131    ///
132    /// # Example
133    ///
134    /// ```yaml
135    /// long_term_storage:
136    ///   type: s3compatible
137    ///   endpoint: https://s3.amazonaws.com
138    ///   bucket: my-bucket
139    /// ```
140    S3Compatible {
141        /// S3 endpoint URL.
142        ///
143        /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
144        ///
145        /// # Environment Variables
146        ///
147        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
148        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
149        ///
150        /// Or for long-term storage:
151        /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
152        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
153        endpoint: String,
154
155        /// S3 bucket name.
156        ///
157        /// The bucket must exist before starting the server.
158        ///
159        /// # Environment Variables
160        ///
161        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
162        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
163        bucket: String,
164    },
165
166    /// [Google Cloud Storage] backend (type `"gcs"`).
167    ///
168    /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
169    /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
170    /// environment variable or GCE/GKE metadata service.
171    ///
172    /// **Note**: The bucket must be pre-created with the following lifecycle policy:
173    /// - `daysSinceCustomTime`: 1 day
174    /// - `action`: delete
175    ///
176    /// [Google Cloud Storage]: https://cloud.google.com/storage
177    ///
178    /// # Example
179    ///
180    /// ```yaml
181    /// long_term_storage:
182    ///   type: gcs
183    ///   bucket: objectstore-bucket
184    /// ```
185    Gcs {
186        /// Optional custom GCS endpoint URL.
187        ///
188        /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
189        ///
190        /// # Default
191        ///
192        /// `None` (uses default GCS endpoint)
193        ///
194        /// # Environment Variables
195        ///
196        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
197        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
198        ///
199        /// Or for long-term storage:
200        /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
201        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
202        endpoint: Option<String>,
203
204        /// GCS bucket name.
205        ///
206        /// The bucket must exist before starting the server.
207        ///
208        /// # Environment Variables
209        ///
210        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
211        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
212        bucket: String,
213    },
214
215    /// [Google Bigtable] backend (type `"bigtable"`).
216    ///
217    /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
218    /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
219    /// Application Default Credentials (ADC).
220    ///
221    /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
222    /// following column families:
223    /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
224    /// - `fm`: manual garbage collection (`no GC policy`)
225    ///
226    /// [Google Bigtable]: https://cloud.google.com/bigtable
227    ///
228    /// # Example
229    ///
230    /// ```yaml
231    /// high_volume_storage:
232    ///   type: bigtable
233    ///   project_id: my-project
234    ///   instance_name: objectstore
235    ///   table_name: objectstore
236    /// ```
237    BigTable {
238        /// Optional custom Bigtable endpoint.
239        ///
240        /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
241        ///
242        /// # Default
243        ///
244        /// `None` (uses default Bigtable endpoint)
245        ///
246        /// # Environment Variables
247        ///
248        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
249        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
250        ///
251        /// Or for long-term storage:
252        /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
253        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
254        endpoint: Option<String>,
255
256        /// GCP project ID.
257        ///
258        /// The Google project ID (not project number) containing the Bigtable instance.
259        ///
260        /// # Environment Variables
261        ///
262        /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
263        /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
264        project_id: String,
265
266        /// Bigtable instance name.
267        ///
268        /// # Environment Variables
269        ///
270        /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
271        /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
272        instance_name: String,
273
274        /// Bigtable table name.
275        ///
276        /// The table must exist before starting the server.
277        ///
278        /// # Environment Variables
279        ///
280        /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
281        /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
282        table_name: String,
283
284        /// Optional number of connections to maintain to Bigtable.
285        ///
286        /// # Default
287        ///
288        /// `None` (infers connection count based on CPU count)
289        ///
290        /// # Environment Variables
291        ///
292        /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
293        /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
294        connections: Option<usize>,
295    },
296}
297
298/// Runtime configuration for the Tokio async runtime.
299///
300/// Controls the threading behavior of the server's async runtime.
301///
302/// Used in: [`Config::runtime`]
303#[derive(Debug, Clone, Deserialize, Serialize)]
304#[serde(default)]
305pub struct Runtime {
306    /// Number of worker threads for the server runtime.
307    ///
308    /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
309    /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
310    /// switching overhead.
311    ///
312    /// Set this in accordance with the resources available to the server, especially in Kubernetes
313    /// environments.
314    ///
315    /// # Default
316    ///
317    /// Defaults to the number of CPU cores on the host machine.
318    ///
319    /// # Environment Variable
320    ///
321    /// `OS__RUNTIME__WORKER_THREADS`
322    ///
323    /// # Considerations
324    ///
325    /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
326    /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
327    /// - Setting this too high can lead to increased memory usage and context switching
328    pub worker_threads: usize,
329}
330
331impl Default for Runtime {
332    fn default() -> Self {
333        Self {
334            worker_threads: num_cpus::get(),
335        }
336    }
337}
338
339/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
340///
341/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
342/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
343///
344/// Used in: [`Config::sentry`]
345#[derive(Debug, Clone, Deserialize, Serialize)]
346pub struct Sentry {
347    /// Sentry DSN (Data Source Name).
348    ///
349    /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
350    /// integration is completely disabled.
351    ///
352    /// # Default
353    ///
354    /// `None` (Sentry disabled)
355    ///
356    /// # Environment Variable
357    ///
358    /// `OS__SENTRY__DSN`
359    pub dsn: Option<SecretBox<ConfigSecret>>,
360
361    /// Environment name for this deployment.
362    ///
363    /// Used to distinguish events from different environments (e.g., "production", "staging",
364    /// "development"). This appears in the Sentry UI and can be used for filtering.
365    ///
366    /// # Default
367    ///
368    /// `None`
369    ///
370    /// # Environment Variable
371    ///
372    /// `OS__SENTRY__ENVIRONMENT`
373    pub environment: Option<Cow<'static, str>>,
374
375    /// Server name or identifier.
376    ///
377    /// Used to identify which server instance sent an event. Useful in multi-server deployments for
378    /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
379    ///
380    /// # Default
381    ///
382    /// `None`
383    ///
384    /// # Environment Variable
385    ///
386    /// `OS__SENTRY__SERVER_NAME`
387    pub server_name: Option<Cow<'static, str>>,
388
389    /// Error event sampling rate.
390    ///
391    /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
392    /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
393    ///
394    /// # Default
395    ///
396    /// `1.0` (send all errors)
397    ///
398    /// # Environment Variable
399    ///
400    /// `OS__SENTRY__SAMPLE_RATE`
401    pub sample_rate: f32,
402
403    /// Performance trace sampling rate.
404    ///
405    /// Controls what percentage of transactions (traces) are sent to Sentry for performance
406    /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
407    ///
408    /// **Important**: Performance traces can generate significant data volume in high-traffic
409    /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
410    ///
411    /// # Default
412    ///
413    /// `0.01` (send 1% of traces)
414    ///
415    /// # Environment Variable
416    ///
417    /// `OS__SENTRY__TRACES_SAMPLE_RATE`
418    pub traces_sample_rate: f32,
419
420    /// Whether to inherit sampling decisions from incoming traces.
421    ///
422    /// When `true` (default), if an incoming request contains a distributed tracing header with a
423    /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
424    /// the local `traces_sample_rate` is always used instead.
425    ///
426    /// When this is enabled, the calling service effectively controls the sampling decision for the
427    /// entire trace. Set this to `false` if you want to have independent sampling control at the
428    /// objectstore level.
429    ///
430    /// # Default
431    ///
432    /// `true`
433    ///
434    /// # Environment Variable
435    ///
436    /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
437    pub inherit_sampling_decision: bool,
438
439    /// Enable Sentry SDK debug mode.
440    ///
441    /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
442    /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
443    /// production as it generates verbose logging.
444    ///
445    /// # Default
446    ///
447    /// `false`
448    ///
449    /// # Environment Variable
450    ///
451    /// `OS__SENTRY__DEBUG`
452    pub debug: bool,
453
454    /// Additional tags to attach to all Sentry events.
455    ///
456    /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
457    /// context such as deployment identifiers or environment details.
458    ///
459    /// # Default
460    ///
461    /// Empty (no tags)
462    ///
463    /// # Environment Variables
464    ///
465    /// Each tag is set individually:
466    /// - `OS__SENTRY__TAGS__FOO=foo`
467    /// - `OS__SENTRY__TAGS__BAR=bar`
468    ///
469    /// # YAML Example
470    ///
471    /// ```yaml
472    /// sentry:
473    ///   tags:
474    ///     foo: foo
475    ///     bar: bar
476    /// ```
477    pub tags: BTreeMap<String, String>,
478}
479
480impl Sentry {
481    /// Returns whether Sentry integration is enabled.
482    ///
483    /// Sentry is considered enabled if a DSN is configured.
484    pub fn is_enabled(&self) -> bool {
485        self.dsn.is_some()
486    }
487}
488
489impl Default for Sentry {
490    fn default() -> Self {
491        Self {
492            dsn: None,
493            environment: None,
494            server_name: None,
495            sample_rate: 1.0,
496            traces_sample_rate: 0.01,
497            inherit_sampling_decision: true,
498            debug: false,
499            tags: BTreeMap::new(),
500        }
501    }
502}
503
504/// Log output format.
505///
506/// Controls how log messages are formatted. The format can be explicitly specified or
507/// auto-detected based on whether output is to a TTY.
508#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
509#[serde(rename_all = "lowercase")]
510pub enum LogFormat {
511    /// Auto detect the best format.
512    ///
513    /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
514    Auto,
515
516    /// Pretty printing with colors.
517    ///
518    /// ```text
519    ///  INFO  objectstore::http > objectstore starting
520    /// ```
521    Pretty,
522
523    /// Simplified plain text output.
524    ///
525    /// ```text
526    /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
527    /// ```
528    Simplified,
529
530    /// Dump out JSON lines.
531    ///
532    /// ```text
533    /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
534    /// ```
535    Json,
536}
537
538/// The logging format parse error.
539#[derive(Clone, Debug)]
540pub struct FormatParseError(String);
541
542impl fmt::Display for FormatParseError {
543    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
544        write!(
545            f,
546            r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
547            self.0
548        )
549    }
550}
551
552impl std::str::FromStr for LogFormat {
553    type Err = FormatParseError;
554
555    fn from_str(s: &str) -> Result<Self, Self::Err> {
556        let result = match s {
557            "" => LogFormat::Auto,
558            s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
559            s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
560            s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
561            s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
562            s => return Err(FormatParseError(s.into())),
563        };
564
565        Ok(result)
566    }
567}
568
569impl std::error::Error for FormatParseError {}
570
571mod display_fromstr {
572    pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
573    where
574        S: serde::Serializer,
575        T: std::fmt::Display,
576    {
577        serializer.collect_str(&value)
578    }
579
580    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
581    where
582        D: serde::Deserializer<'de>,
583        T: std::str::FromStr,
584        <T as std::str::FromStr>::Err: std::fmt::Display,
585    {
586        use serde::Deserialize;
587        let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
588        s.parse().map_err(serde::de::Error::custom)
589    }
590}
591
592/// Logging configuration.
593///
594/// Controls the verbosity and format of log output. Logs are always written to stderr.
595///
596/// Used in: [`Config::logging`]
597#[derive(Debug, Clone, Deserialize, Serialize)]
598pub struct Logging {
599    /// Minimum log level to output.
600    ///
601    /// Controls which log messages are emitted based on their severity. Messages at or above this
602    /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
603    /// OFF.
604    ///
605    /// The `RUST_LOG` environment variable provides more granular control per module if needed.
606    ///
607    /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
608    /// only for debugging.
609    ///
610    /// # Default
611    ///
612    /// `INFO`
613    ///
614    /// # Environment Variable
615    ///
616    /// `OS__LOGGING__LEVEL`
617    ///
618    /// # Considerations
619    ///
620    /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
621    /// - `INFO` is appropriate for production
622    /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
623    /// -
624    #[serde(with = "display_fromstr")]
625    pub level: LevelFilter,
626
627    /// Log output format.
628    ///
629    /// Determines how log messages are formatted. See [`LogFormat`] for available options and
630    /// examples.
631    ///
632    /// # Default
633    ///
634    /// `Auto` (pretty for TTY, simplified otherwise)
635    ///
636    /// # Environment Variable
637    ///
638    /// `OS__LOGGING__FORMAT`
639    pub format: LogFormat,
640}
641
642impl Default for Logging {
643    fn default() -> Self {
644        Self {
645            level: LevelFilter::INFO,
646            format: LogFormat::Auto,
647        }
648    }
649}
650
651/// Metrics configuration.
652///
653/// Configures submission of internal metrics to Datadog.
654#[derive(Clone, Debug, Default, Deserialize, Serialize)]
655pub struct Metrics {
656    /// Datadog [API key] for metrics.
657    ///
658    /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
659    /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
660    ///
661    /// # Default
662    ///
663    /// `None` (Datadog metrics disabled)
664    ///
665    /// # Environment Variable
666    ///
667    /// `OS__METRICS__DATADOG_KEY`
668    ///
669    /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
670    pub datadog_key: Option<SecretBox<ConfigSecret>>,
671
672    /// Global tags applied to all metrics.
673    ///
674    /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
675    /// identifying the environment, region, or other deployment-specific information.
676    ///
677    /// # Default
678    ///
679    /// Empty (no tags)
680    ///
681    /// # Environment Variables
682    ///
683    /// Each tag is set individually:
684    /// - `OS__METRICS__TAGS__FOO=foo`
685    /// - `OS__METRICS__TAGS__BAR=bar`
686    ///
687    /// # YAML Example
688    ///
689    /// ```yaml
690    /// metrics:
691    ///   tags:
692    ///     foo: foo
693    ///     bar: bar
694    /// ```
695    pub tags: BTreeMap<String, String>,
696}
697
698/// A key that may be used to verify a request's `Authorization` header and its
699/// associated permissions. May contain multiple key versions to facilitate rotation.
700#[derive(Debug, Clone, Deserialize, Serialize)]
701pub struct AuthZVerificationKey {
702    /// Versions of this key's key material which may be used to verify signatures.
703    ///
704    /// If a key is being rotated, the old and new versions of that key should both be
705    /// configured so objectstore can verify signatures while the updated key is still
706    /// rolling out. Otherwise, this should only contain the most recent version of a key.
707    pub key_versions: Vec<SecretBox<ConfigSecret>>,
708
709    /// The maximum set of permissions that this key's signer is authorized to grant.
710    ///
711    /// If a request's `Authorization` header grants full permission but it was signed by
712    /// a key that is only allowed to grant read permission, then the request only has
713    /// read permission.
714    #[serde(default)]
715    pub max_permissions: HashSet<Permission>,
716}
717
718/// Configuration for content-based authorization.
719#[derive(Debug, Default, Clone, Deserialize, Serialize)]
720pub struct AuthZ {
721    /// Whether to enforce content-based authorization or not.
722    ///
723    /// If this is set to `false`, checks are still performed but failures will not result
724    /// in `403 Unauthorized` responses.
725    pub enforce: bool,
726
727    /// Keys that may be used to verify a request's `Authorization` header.
728    ///
729    /// This field is a container that is keyed on a key's ID. When verifying a JWT
730    /// from the `Authorization` header, the `kid` field should be read from the JWT
731    /// header and used to index into this map to select the appropriate key.
732    #[serde(default)]
733    pub keys: BTreeMap<String, AuthZVerificationKey>,
734}
735
736/// Main configuration struct for the objectstore server.
737///
738/// This is the top-level configuration that combines all server settings including networking,
739/// storage backends, runtime, and observability options.
740///
741/// Configuration is loaded with the following precedence (highest to lowest):
742/// 1. Environment variables (prefixed with `OS__`)
743/// 2. YAML configuration file (if provided via `-c` flag)
744/// 3. Default values
745///
746/// See individual field documentation for details on each configuration option, including
747/// defaults and environment variables.
748#[derive(Debug, Clone, Deserialize, Serialize)]
749pub struct Config {
750    /// HTTP server bind address.
751    ///
752    /// The socket address (IP and port) where the HTTP server will listen for incoming
753    /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
754    /// makes the server accessible from all network interfaces.
755    ///
756    /// # Default
757    ///
758    /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
759    ///
760    /// # Environment Variable
761    ///
762    /// `OS__HTTP_ADDR`
763    pub http_addr: SocketAddr,
764
765    /// Storage backend for high-volume, small objects.
766    ///
767    /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
768    /// access with many small objects is desired. Good candidates include Bigtable, local
769    /// filesystem (for development), or fast SSDs. Can be set to the same backend as
770    /// `long_term_storage` for simplicity.
771    ///
772    /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
773    /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
774    /// change in the future and more configuration options will be added to influence this
775    /// decision.
776    ///
777    /// # Default
778    ///
779    /// Filesystem storage in `./data/high-volume` directory
780    ///
781    /// # Environment Variables
782    ///
783    /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
784    ///   options.
785    ///
786    /// # Example
787    ///
788    /// ```yaml
789    /// high_volume_storage:
790    ///   type: bigtable
791    ///   project_id: my-project
792    ///   instance_name: objectstore
793    ///   table_name: objectstore
794    /// ```
795    pub high_volume_storage: Storage,
796
797    /// Storage backend for large objects with long-term retention.
798    ///
799    /// This backend is used for larger objects in scenarios with lower throughput and higher
800    /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
801    /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
802    ///
803    /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
804    /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
805    /// change in the future and more configuration options will be added to influence this
806    /// decision.
807    ///
808    /// # Default
809    ///
810    /// Filesystem storage in `./data/long-term` directory
811    ///
812    /// # Environment Variables
813    ///
814    /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
815    /// - Additional fields depending on the type (see [`Storage`])
816    ///
817    /// # Example
818    ///
819    /// ```yaml
820    /// long_term_storage:
821    ///   type: gcs
822    ///   bucket: my-objectstore-bucket
823    /// ```
824    pub long_term_storage: Storage,
825
826    /// Configuration of the internal task runtime.
827    ///
828    /// Controls the thread pool size and behavior of the async runtime powering the server.
829    /// See [`Runtime`] for configuration options.
830    pub runtime: Runtime,
831
832    /// Logging configuration.
833    ///
834    /// Controls log verbosity and output format. See [`Logging`] for configuration options.
835    pub logging: Logging,
836
837    /// Sentry error tracking configuration.
838    ///
839    /// Optional integration with Sentry for error tracking and performance monitoring.
840    /// See [`Sentry`] for configuration options.
841    pub sentry: Sentry,
842
843    /// Internal metrics configuration.
844    ///
845    /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
846    /// configuration options.
847    pub metrics: Metrics,
848
849    /// Content-based authorization configuration.
850    ///
851    /// Controls the verification and enforcement of content-based access control based on the
852    /// JWT in a request's `Authorization` header.
853    pub auth: AuthZ,
854}
855
856impl Default for Config {
857    fn default() -> Self {
858        Self {
859            http_addr: "0.0.0.0:8888".parse().unwrap(),
860
861            high_volume_storage: Storage::FileSystem {
862                path: PathBuf::from("data/high-volume"),
863            },
864            long_term_storage: Storage::FileSystem {
865                path: PathBuf::from("data/long-term"),
866            },
867
868            runtime: Runtime::default(),
869            logging: Logging::default(),
870            sentry: Sentry::default(),
871            metrics: Metrics::default(),
872            auth: AuthZ::default(),
873        }
874    }
875}
876
877impl Config {
878    /// Loads configuration from the provided arguments.
879    ///
880    /// Configuration is merged in the following order (later sources override earlier ones):
881    /// 1. Default values
882    /// 2. YAML configuration file (if provided in `args`)
883    /// 3. Environment variables (prefixed with `OS__`)
884    ///
885    /// # Errors
886    ///
887    /// Returns an error if:
888    /// - The YAML configuration file cannot be read or parsed
889    /// - Environment variables contain invalid values
890    /// - Required fields are missing or invalid
891    pub fn load(path: Option<&Path>) -> Result<Self> {
892        let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
893        if let Some(path) = path {
894            figment = figment.merge(Yaml::file(path));
895        }
896        let config = figment
897            .merge(Env::prefixed(ENV_PREFIX).split("__"))
898            .extract()?;
899
900        Ok(config)
901    }
902}
903
904#[cfg(test)]
905mod tests {
906    use std::io::Write;
907
908    use secrecy::ExposeSecret;
909
910    use super::*;
911
912    #[test]
913    fn configurable_via_env() {
914        figment::Jail::expect_with(|jail| {
915            jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
916            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
917            jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
918            jail.set_env("OS__METRICS__TAGS__FOO", "bar");
919            jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
920            jail.set_env("OS__SENTRY__DSN", "abcde");
921            jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
922            jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
923            jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
924            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
925            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
926
927            let config = Config::load(None).unwrap();
928
929            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
930            else {
931                panic!("expected s3 storage");
932            };
933            assert_eq!(endpoint, "http://localhost:8888");
934            assert_eq!(bucket, "whatever");
935            assert_eq!(
936                config.metrics.tags,
937                [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
938            );
939
940            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
941            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
942            assert_eq!(
943                config.sentry.server_name.as_deref(),
944                Some("objectstore-deadbeef")
945            );
946            assert_eq!(config.sentry.sample_rate, 0.5);
947            assert_eq!(config.sentry.traces_sample_rate, 0.5);
948
949            Ok(())
950        });
951    }
952
953    #[test]
954    fn configurable_via_yaml() {
955        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
956        tempfile
957            .write_all(
958                br#"
959            long_term_storage:
960                type: s3compatible
961                endpoint: http://localhost:8888
962                bucket: whatever
963            sentry:
964                dsn: abcde
965                environment: production
966                server_name: objectstore-deadbeef
967                sample_rate: 0.5
968                traces_sample_rate: 0.5
969            "#,
970            )
971            .unwrap();
972
973        figment::Jail::expect_with(|_jail| {
974            let config = Config::load(Some(tempfile.path())).unwrap();
975
976            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
977            else {
978                panic!("expected s3 storage");
979            };
980            assert_eq!(endpoint, "http://localhost:8888");
981            assert_eq!(bucket, "whatever");
982
983            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
984            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
985            assert_eq!(
986                config.sentry.server_name.as_deref(),
987                Some("objectstore-deadbeef")
988            );
989            assert_eq!(config.sentry.sample_rate, 0.5);
990            assert_eq!(config.sentry.traces_sample_rate, 0.5);
991
992            Ok(())
993        });
994    }
995
996    #[test]
997    fn configured_with_env_and_yaml() {
998        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
999        tempfile
1000            .write_all(
1001                br#"
1002            long_term_storage:
1003                type: s3compatible
1004                endpoint: http://localhost:8888
1005                bucket: whatever
1006            "#,
1007            )
1008            .unwrap();
1009
1010        figment::Jail::expect_with(|jail| {
1011            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1012
1013            let config = Config::load(Some(tempfile.path())).unwrap();
1014
1015            let Storage::S3Compatible {
1016                endpoint,
1017                bucket: _bucket,
1018            } = &dbg!(&config).long_term_storage
1019            else {
1020                panic!("expected s3 storage");
1021            };
1022            // Env should overwrite the yaml config
1023            assert_eq!(endpoint, "http://localhost:9001");
1024
1025            Ok(())
1026        });
1027    }
1028
1029    #[test]
1030    fn configure_auth_with_env() {
1031        figment::Jail::expect_with(|jail| {
1032            jail.set_env("OS__AUTH__ENFORCE", "true");
1033            jail.set_env(
1034                "OS__AUTH__KEYS",
1035                r#"{kid1={key_versions=["abcde","fghij","this is a test\n  multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_versions=["12345"],}}"#,
1036            );
1037
1038            let config = Config::load(None).unwrap();
1039
1040            assert!(config.auth.enforce);
1041
1042            let kid1 = config.auth.keys.get("kid1").unwrap();
1043            assert_eq!(kid1.key_versions[0].expose_secret().as_str(), "abcde");
1044            assert_eq!(kid1.key_versions[1].expose_secret().as_str(), "fghij");
1045            assert_eq!(
1046                kid1.key_versions[2].expose_secret().as_str(),
1047                "this is a test\n  multiline string\nend of string\n"
1048            );
1049            assert_eq!(
1050                kid1.max_permissions,
1051                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1052            );
1053
1054            let kid2 = config.auth.keys.get("kid2").unwrap();
1055            assert_eq!(kid2.key_versions[0].expose_secret().as_str(), "12345");
1056            assert_eq!(kid2.max_permissions, HashSet::new());
1057
1058            Ok(())
1059        });
1060    }
1061    #[test]
1062    fn configure_auth_with_yaml() {
1063        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1064        tempfile
1065            .write_all(
1066                br#"
1067            auth:
1068                enforce: true
1069                keys:
1070                    kid1:
1071                        key_versions:
1072                            - "abcde"
1073                            - "fghij"
1074                            - |
1075                              this is a test
1076                                multiline string
1077                              end of string
1078                        max_permissions:
1079                            - "object.read"
1080                            - "object.write"
1081                    kid2:
1082                        key_versions:
1083                            - "12345"
1084            "#,
1085            )
1086            .unwrap();
1087
1088        figment::Jail::expect_with(|_jail| {
1089            let config = Config::load(Some(tempfile.path())).unwrap();
1090
1091            assert!(config.auth.enforce);
1092
1093            let kid1 = config.auth.keys.get("kid1").unwrap();
1094            assert_eq!(kid1.key_versions[0].expose_secret().as_str(), "abcde");
1095            assert_eq!(kid1.key_versions[1].expose_secret().as_str(), "fghij");
1096            assert_eq!(
1097                kid1.key_versions[2].expose_secret().as_str(),
1098                "this is a test\n  multiline string\nend of string\n"
1099            );
1100            assert_eq!(
1101                kid1.max_permissions,
1102                HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1103            );
1104
1105            let kid2 = config.auth.keys.get("kid2").unwrap();
1106            assert_eq!(kid2.key_versions[0].expose_secret().as_str(), "12345");
1107            assert_eq!(kid2.max_permissions, HashSet::new());
1108
1109            Ok(())
1110        });
1111    }
1112}