objectstore_server/
config.rs

1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//!   type: filesystem
31//!   path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::BTreeMap;
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39
40use anyhow::Result;
41use figment::providers::{Env, Format, Serialized, Yaml};
42use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
43use serde::{Deserialize, Serialize};
44use tracing::level_filters::LevelFilter;
45
46/// Environment variable prefix for all configuration options.
47const ENV_PREFIX: &str = "OS__";
48
49/// Newtype around `String` that may protect against accidental
50/// logging of secrets in our configuration struct. Use with
51/// [`secrecy::SecretBox`].
52#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
53pub struct ConfigSecret(String);
54
55impl ConfigSecret {
56    pub fn as_str(&self) -> &str {
57        self.0.as_str()
58    }
59}
60
61impl std::ops::Deref for ConfigSecret {
62    type Target = str;
63    fn deref(&self) -> &Self::Target {
64        &self.0
65    }
66}
67
68impl fmt::Debug for ConfigSecret {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
70        write!(f, "[redacted]")
71    }
72}
73
74impl CloneableSecret for ConfigSecret {}
75impl SerializableSecret for ConfigSecret {}
76impl Zeroize for ConfigSecret {
77    fn zeroize(&mut self) {
78        self.0.zeroize();
79    }
80}
81
82/// Storage backend configuration.
83///
84/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
85///
86/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
87#[derive(Debug, Clone, Deserialize, Serialize)]
88#[serde(tag = "type", rename_all = "lowercase")]
89pub enum Storage {
90    /// Local filesystem storage backend (type `"filesystem"`).
91    ///
92    /// Stores objects as files on the local filesystem. Suitable for development, testing,
93    /// and single-server deployments.
94    ///
95    /// # Example
96    ///
97    /// ```yaml
98    /// long_term_storage:
99    ///   type: filesystem
100    ///   path: /data
101    /// ```
102    FileSystem {
103        /// Directory path for storing objects.
104        ///
105        /// The directory will be created if it doesn't exist. Relative paths are resolved from
106        /// the server's working directory.
107        ///
108        /// # Default
109        ///
110        /// `"data"` (relative to the server's working directory)
111        ///
112        /// # Environment Variables
113        ///
114        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
115        /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
116        ///
117        /// Or for long-term storage:
118        /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
119        /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
120        path: PathBuf,
121    },
122
123    /// S3-compatible storage backend (type `"s3compatible"`).
124    ///
125    /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
126    /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
127    ///
128    /// [Amazon S3]: https://aws.amazon.com/s3/
129    ///
130    /// # Example
131    ///
132    /// ```yaml
133    /// long_term_storage:
134    ///   type: s3compatible
135    ///   endpoint: https://s3.amazonaws.com
136    ///   bucket: my-bucket
137    /// ```
138    S3Compatible {
139        /// S3 endpoint URL.
140        ///
141        /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
142        ///
143        /// # Environment Variables
144        ///
145        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
146        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
147        ///
148        /// Or for long-term storage:
149        /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
150        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
151        endpoint: String,
152
153        /// S3 bucket name.
154        ///
155        /// The bucket must exist before starting the server.
156        ///
157        /// # Environment Variables
158        ///
159        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
160        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
161        bucket: String,
162    },
163
164    /// [Google Cloud Storage] backend (type `"gcs"`).
165    ///
166    /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
167    /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
168    /// environment variable or GCE/GKE metadata service.
169    ///
170    /// **Note**: The bucket must be pre-created with the following lifecycle policy:
171    /// - `daysSinceCustomTime`: 1 day
172    /// - `action`: delete
173    ///
174    /// [Google Cloud Storage]: https://cloud.google.com/storage
175    ///
176    /// # Example
177    ///
178    /// ```yaml
179    /// long_term_storage:
180    ///   type: gcs
181    ///   bucket: objectstore-bucket
182    /// ```
183    Gcs {
184        /// Optional custom GCS endpoint URL.
185        ///
186        /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
187        ///
188        /// # Default
189        ///
190        /// `None` (uses default GCS endpoint)
191        ///
192        /// # Environment Variables
193        ///
194        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
195        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
196        ///
197        /// Or for long-term storage:
198        /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
199        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
200        endpoint: Option<String>,
201
202        /// GCS bucket name.
203        ///
204        /// The bucket must exist before starting the server.
205        ///
206        /// # Environment Variables
207        ///
208        /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
209        /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
210        bucket: String,
211    },
212
213    /// [Google Bigtable] backend (type `"bigtable"`).
214    ///
215    /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
216    /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
217    /// Application Default Credentials (ADC).
218    ///
219    /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
220    /// following column families:
221    /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
222    /// - `fm`: manual garbage collection (`no GC policy`)
223    ///
224    /// [Google Bigtable]: https://cloud.google.com/bigtable
225    ///
226    /// # Example
227    ///
228    /// ```yaml
229    /// high_volume_storage:
230    ///   type: bigtable
231    ///   project_id: my-project
232    ///   instance_name: objectstore
233    ///   table_name: objectstore
234    /// ```
235    BigTable {
236        /// Optional custom Bigtable endpoint.
237        ///
238        /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
239        ///
240        /// # Default
241        ///
242        /// `None` (uses default Bigtable endpoint)
243        ///
244        /// # Environment Variables
245        ///
246        /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
247        /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
248        ///
249        /// Or for long-term storage:
250        /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
251        /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
252        endpoint: Option<String>,
253
254        /// GCP project ID.
255        ///
256        /// The Google project ID (not project number) containing the Bigtable instance.
257        ///
258        /// # Environment Variables
259        ///
260        /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
261        /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
262        project_id: String,
263
264        /// Bigtable instance name.
265        ///
266        /// # Environment Variables
267        ///
268        /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
269        /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
270        instance_name: String,
271
272        /// Bigtable table name.
273        ///
274        /// The table must exist before starting the server.
275        ///
276        /// # Environment Variables
277        ///
278        /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
279        /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
280        table_name: String,
281
282        /// Optional number of connections to maintain to Bigtable.
283        ///
284        /// # Default
285        ///
286        /// `None` (infers connection count based on CPU count)
287        ///
288        /// # Environment Variables
289        ///
290        /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
291        /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
292        connections: Option<usize>,
293    },
294}
295
296/// Runtime configuration for the Tokio async runtime.
297///
298/// Controls the threading behavior of the server's async runtime.
299///
300/// Used in: [`Config::runtime`]
301#[derive(Debug, Clone, Deserialize, Serialize)]
302#[serde(default)]
303pub struct Runtime {
304    /// Number of worker threads for the server runtime.
305    ///
306    /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
307    /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
308    /// switching overhead.
309    ///
310    /// Set this in accordance with the resources available to the server, especially in Kubernetes
311    /// environments.
312    ///
313    /// # Default
314    ///
315    /// Defaults to the number of CPU cores on the host machine.
316    ///
317    /// # Environment Variable
318    ///
319    /// `OS__RUNTIME__WORKER_THREADS`
320    ///
321    /// # Considerations
322    ///
323    /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
324    /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
325    /// - Setting this too high can lead to increased memory usage and context switching
326    pub worker_threads: usize,
327}
328
329impl Default for Runtime {
330    fn default() -> Self {
331        Self {
332            worker_threads: num_cpus::get(),
333        }
334    }
335}
336
337/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
338///
339/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
340/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
341///
342/// Used in: [`Config::sentry`]
343#[derive(Debug, Clone, Deserialize, Serialize)]
344pub struct Sentry {
345    /// Sentry DSN (Data Source Name).
346    ///
347    /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
348    /// integration is completely disabled.
349    ///
350    /// # Default
351    ///
352    /// `None` (Sentry disabled)
353    ///
354    /// # Environment Variable
355    ///
356    /// `OS__SENTRY__DSN`
357    pub dsn: Option<SecretBox<ConfigSecret>>,
358
359    /// Environment name for this deployment.
360    ///
361    /// Used to distinguish events from different environments (e.g., "production", "staging",
362    /// "development"). This appears in the Sentry UI and can be used for filtering.
363    ///
364    /// # Default
365    ///
366    /// `None`
367    ///
368    /// # Environment Variable
369    ///
370    /// `OS__SENTRY__ENVIRONMENT`
371    pub environment: Option<Cow<'static, str>>,
372
373    /// Server name or identifier.
374    ///
375    /// Used to identify which server instance sent an event. Useful in multi-server deployments for
376    /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
377    ///
378    /// # Default
379    ///
380    /// `None`
381    ///
382    /// # Environment Variable
383    ///
384    /// `OS__SENTRY__SERVER_NAME`
385    pub server_name: Option<Cow<'static, str>>,
386
387    /// Error event sampling rate.
388    ///
389    /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
390    /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
391    ///
392    /// # Default
393    ///
394    /// `1.0` (send all errors)
395    ///
396    /// # Environment Variable
397    ///
398    /// `OS__SENTRY__SAMPLE_RATE`
399    pub sample_rate: f32,
400
401    /// Performance trace sampling rate.
402    ///
403    /// Controls what percentage of transactions (traces) are sent to Sentry for performance
404    /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
405    ///
406    /// **Important**: Performance traces can generate significant data volume in high-traffic
407    /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
408    ///
409    /// # Default
410    ///
411    /// `0.01` (send 1% of traces)
412    ///
413    /// # Environment Variable
414    ///
415    /// `OS__SENTRY__TRACES_SAMPLE_RATE`
416    pub traces_sample_rate: f32,
417
418    /// Whether to inherit sampling decisions from incoming traces.
419    ///
420    /// When `true` (default), if an incoming request contains a distributed tracing header with a
421    /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
422    /// the local `traces_sample_rate` is always used instead.
423    ///
424    /// When this is enabled, the calling service effectively controls the sampling decision for the
425    /// entire trace. Set this to `false` if you want to have independent sampling control at the
426    /// objectstore level.
427    ///
428    /// # Default
429    ///
430    /// `true`
431    ///
432    /// # Environment Variable
433    ///
434    /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
435    pub inherit_sampling_decision: bool,
436
437    /// Enable Sentry SDK debug mode.
438    ///
439    /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
440    /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
441    /// production as it generates verbose logging.
442    ///
443    /// # Default
444    ///
445    /// `false`
446    ///
447    /// # Environment Variable
448    ///
449    /// `OS__SENTRY__DEBUG`
450    pub debug: bool,
451}
452
453impl Sentry {
454    /// Returns whether Sentry integration is enabled.
455    ///
456    /// Sentry is considered enabled if a DSN is configured.
457    pub fn is_enabled(&self) -> bool {
458        self.dsn.is_some()
459    }
460}
461
462impl Default for Sentry {
463    fn default() -> Self {
464        Self {
465            dsn: None,
466            environment: None,
467            server_name: None,
468            sample_rate: 1.0,
469            traces_sample_rate: 0.01,
470            inherit_sampling_decision: true,
471            debug: false,
472        }
473    }
474}
475
476/// Log output format.
477///
478/// Controls how log messages are formatted. The format can be explicitly specified or
479/// auto-detected based on whether output is to a TTY.
480#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
481#[serde(rename_all = "lowercase")]
482pub enum LogFormat {
483    /// Auto detect the best format.
484    ///
485    /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
486    Auto,
487
488    /// Pretty printing with colors.
489    ///
490    /// ```text
491    ///  INFO  objectstore::http > objectstore starting
492    /// ```
493    Pretty,
494
495    /// Simplified plain text output.
496    ///
497    /// ```text
498    /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
499    /// ```
500    Simplified,
501
502    /// Dump out JSON lines.
503    ///
504    /// ```text
505    /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
506    /// ```
507    Json,
508}
509
510/// The logging format parse error.
511#[derive(Clone, Debug)]
512pub struct FormatParseError(String);
513
514impl fmt::Display for FormatParseError {
515    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516        write!(
517            f,
518            r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
519            self.0
520        )
521    }
522}
523
524impl std::str::FromStr for LogFormat {
525    type Err = FormatParseError;
526
527    fn from_str(s: &str) -> Result<Self, Self::Err> {
528        let result = match s {
529            "" => LogFormat::Auto,
530            s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
531            s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
532            s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
533            s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
534            s => return Err(FormatParseError(s.into())),
535        };
536
537        Ok(result)
538    }
539}
540
541impl std::error::Error for FormatParseError {}
542
543mod display_fromstr {
544    pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
545    where
546        S: serde::Serializer,
547        T: std::fmt::Display,
548    {
549        serializer.collect_str(&value)
550    }
551
552    pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
553    where
554        D: serde::Deserializer<'de>,
555        T: std::str::FromStr,
556        <T as std::str::FromStr>::Err: std::fmt::Display,
557    {
558        use serde::Deserialize;
559        let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
560        s.parse().map_err(serde::de::Error::custom)
561    }
562}
563
564/// Logging configuration.
565///
566/// Controls the verbosity and format of log output. Logs are always written to stderr.
567///
568/// Used in: [`Config::logging`]
569#[derive(Debug, Clone, Deserialize, Serialize)]
570pub struct Logging {
571    /// Minimum log level to output.
572    ///
573    /// Controls which log messages are emitted based on their severity. Messages at or above this
574    /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
575    /// OFF.
576    ///
577    /// The `RUST_LOG` environment variable provides more granular control per module if needed.
578    ///
579    /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
580    /// only for debugging.
581    ///
582    /// # Default
583    ///
584    /// `INFO`
585    ///
586    /// # Environment Variable
587    ///
588    /// `OS__LOGGING__LEVEL`
589    ///
590    /// # Considerations
591    ///
592    /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
593    /// - `INFO` is appropriate for production
594    /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
595    /// -
596    #[serde(with = "display_fromstr")]
597    pub level: LevelFilter,
598
599    /// Log output format.
600    ///
601    /// Determines how log messages are formatted. See [`LogFormat`] for available options and
602    /// examples.
603    ///
604    /// # Default
605    ///
606    /// `Auto` (pretty for TTY, simplified otherwise)
607    ///
608    /// # Environment Variable
609    ///
610    /// `OS__LOGGING__FORMAT`
611    pub format: LogFormat,
612}
613
614impl Default for Logging {
615    fn default() -> Self {
616        Self {
617            level: LevelFilter::INFO,
618            format: LogFormat::Auto,
619        }
620    }
621}
622
623/// Metrics configuration.
624///
625/// Configures submission of internal metrics to Datadog.
626#[derive(Clone, Debug, Default, Deserialize, Serialize)]
627pub struct Metrics {
628    /// Datadog [API key] for metrics.
629    ///
630    /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
631    /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
632    ///
633    /// # Default
634    ///
635    /// `None` (Datadog metrics disabled)
636    ///
637    /// # Environment Variable
638    ///
639    /// `OS__METRICS__DATADOG_KEY`
640    ///
641    /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
642    pub datadog_key: Option<SecretBox<ConfigSecret>>,
643
644    /// Global tags applied to all metrics.
645    ///
646    /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
647    /// identifying the environment, region, or other deployment-specific information.
648    ///
649    /// # Default
650    ///
651    /// Empty (no tags)
652    ///
653    /// # Environment Variables
654    ///
655    /// Each tag is set individually:
656    /// - `OS__METRICS__TAGS__FOO=foo`
657    /// - `OS__METRICS__TAGS__BAR=bar`
658    ///
659    /// # YAML Example
660    ///
661    /// ```yaml
662    /// metrics:
663    ///   tags:
664    ///     foo: foo
665    ///     bar: bar
666    /// ```
667    pub tags: BTreeMap<String, String>,
668}
669
670/// Main configuration struct for the objectstore server.
671///
672/// This is the top-level configuration that combines all server settings including networking,
673/// storage backends, runtime, and observability options.
674///
675/// Configuration is loaded with the following precedence (highest to lowest):
676/// 1. Environment variables (prefixed with `OS__`)
677/// 2. YAML configuration file (if provided via `-c` flag)
678/// 3. Default values
679///
680/// See individual field documentation for details on each configuration option, including
681/// defaults and environment variables.
682#[derive(Debug, Clone, Deserialize, Serialize)]
683pub struct Config {
684    /// HTTP server bind address.
685    ///
686    /// The socket address (IP and port) where the HTTP server will listen for incoming
687    /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
688    /// makes the server accessible from all network interfaces.
689    ///
690    /// # Default
691    ///
692    /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
693    ///
694    /// # Environment Variable
695    ///
696    /// `OS__HTTP_ADDR`
697    pub http_addr: SocketAddr,
698
699    /// Storage backend for high-volume, small objects.
700    ///
701    /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
702    /// access with many small objects is desired. Good candidates include Bigtable, local
703    /// filesystem (for development), or fast SSDs. Can be set to the same backend as
704    /// `long_term_storage` for simplicity.
705    ///
706    /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
707    /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
708    /// change in the future and more configuration options will be added to influence this
709    /// decision.
710    ///
711    /// # Default
712    ///
713    /// Filesystem storage in `./data/high-volume` directory
714    ///
715    /// # Environment Variables
716    ///
717    /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
718    ///   options.
719    ///
720    /// # Example
721    ///
722    /// ```yaml
723    /// high_volume_storage:
724    ///   type: bigtable
725    ///   project_id: my-project
726    ///   instance_name: objectstore
727    ///   table_name: objectstore
728    /// ```
729    pub high_volume_storage: Storage,
730
731    /// Storage backend for large objects with long-term retention.
732    ///
733    /// This backend is used for larger objects in scenarios with lower throughput and higher
734    /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
735    /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
736    ///
737    /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
738    /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
739    /// change in the future and more configuration options will be added to influence this
740    /// decision.
741    ///
742    /// # Default
743    ///
744    /// Filesystem storage in `./data/long-term` directory
745    ///
746    /// # Environment Variables
747    ///
748    /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
749    /// - Additional fields depending on the type (see [`Storage`])
750    ///
751    /// # Example
752    ///
753    /// ```yaml
754    /// long_term_storage:
755    ///   type: gcs
756    ///   bucket: my-objectstore-bucket
757    /// ```
758    pub long_term_storage: Storage,
759
760    /// Configuration of the internal task runtime.
761    ///
762    /// Controls the thread pool size and behavior of the async runtime powering the server.
763    /// See [`Runtime`] for configuration options.
764    pub runtime: Runtime,
765
766    /// Logging configuration.
767    ///
768    /// Controls log verbosity and output format. See [`Logging`] for configuration options.
769    pub logging: Logging,
770
771    /// Sentry error tracking configuration.
772    ///
773    /// Optional integration with Sentry for error tracking and performance monitoring.
774    /// See [`Sentry`] for configuration options.
775    pub sentry: Sentry,
776
777    /// Internal metrics configuration.
778    ///
779    /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
780    /// configuration options.
781    pub metrics: Metrics,
782}
783
784impl Default for Config {
785    fn default() -> Self {
786        Self {
787            http_addr: "0.0.0.0:8888".parse().unwrap(),
788
789            high_volume_storage: Storage::FileSystem {
790                path: PathBuf::from("data/high-volume"),
791            },
792            long_term_storage: Storage::FileSystem {
793                path: PathBuf::from("data/long-term"),
794            },
795
796            runtime: Runtime::default(),
797            logging: Logging::default(),
798            sentry: Sentry::default(),
799            metrics: Metrics::default(),
800        }
801    }
802}
803
804impl Config {
805    /// Loads configuration from the provided arguments.
806    ///
807    /// Configuration is merged in the following order (later sources override earlier ones):
808    /// 1. Default values
809    /// 2. YAML configuration file (if provided in `args`)
810    /// 3. Environment variables (prefixed with `OS__`)
811    ///
812    /// # Errors
813    ///
814    /// Returns an error if:
815    /// - The YAML configuration file cannot be read or parsed
816    /// - Environment variables contain invalid values
817    /// - Required fields are missing or invalid
818    pub fn load(path: Option<&Path>) -> Result<Self> {
819        let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
820        if let Some(path) = path {
821            figment = figment.merge(Yaml::file(path));
822        }
823        let config = figment
824            .merge(Env::prefixed(ENV_PREFIX).split("__"))
825            .extract()?;
826
827        Ok(config)
828    }
829}
830
831#[cfg(test)]
832mod tests {
833    use std::io::Write;
834
835    use secrecy::ExposeSecret;
836
837    use super::*;
838
839    #[test]
840    fn configurable_via_env() {
841        figment::Jail::expect_with(|jail| {
842            jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
843            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
844            jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
845            jail.set_env("OS__METRICS__TAGS__FOO", "bar");
846            jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
847            jail.set_env("OS__SENTRY__DSN", "abcde");
848            jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
849            jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
850            jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
851            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
852            jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
853
854            let config = Config::load(None).unwrap();
855
856            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
857            else {
858                panic!("expected s3 storage");
859            };
860            assert_eq!(endpoint, "http://localhost:8888");
861            assert_eq!(bucket, "whatever");
862            assert_eq!(
863                config.metrics.tags,
864                [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
865            );
866
867            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
868            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
869            assert_eq!(
870                config.sentry.server_name.as_deref(),
871                Some("objectstore-deadbeef")
872            );
873            assert_eq!(config.sentry.sample_rate, 0.5);
874            assert_eq!(config.sentry.traces_sample_rate, 0.5);
875
876            Ok(())
877        });
878    }
879
880    #[test]
881    fn configurable_via_yaml() {
882        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
883        tempfile
884            .write_all(
885                br#"
886            long_term_storage:
887                type: s3compatible
888                endpoint: http://localhost:8888
889                bucket: whatever
890            sentry:
891                dsn: abcde
892                environment: production
893                server_name: objectstore-deadbeef
894                sample_rate: 0.5
895                traces_sample_rate: 0.5
896            "#,
897            )
898            .unwrap();
899
900        figment::Jail::expect_with(|_jail| {
901            let config = Config::load(Some(tempfile.path())).unwrap();
902
903            let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
904            else {
905                panic!("expected s3 storage");
906            };
907            assert_eq!(endpoint, "http://localhost:8888");
908            assert_eq!(bucket, "whatever");
909
910            assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
911            assert_eq!(config.sentry.environment.as_deref(), Some("production"));
912            assert_eq!(
913                config.sentry.server_name.as_deref(),
914                Some("objectstore-deadbeef")
915            );
916            assert_eq!(config.sentry.sample_rate, 0.5);
917            assert_eq!(config.sentry.traces_sample_rate, 0.5);
918
919            Ok(())
920        });
921    }
922
923    #[test]
924    fn configured_with_env_and_yaml() {
925        let mut tempfile = tempfile::NamedTempFile::new().unwrap();
926        tempfile
927            .write_all(
928                br#"
929            long_term_storage:
930                type: s3compatible
931                endpoint: http://localhost:8888
932                bucket: whatever
933            "#,
934            )
935            .unwrap();
936
937        figment::Jail::expect_with(|jail| {
938            jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
939
940            let config = Config::load(Some(tempfile.path())).unwrap();
941
942            let Storage::S3Compatible {
943                endpoint,
944                bucket: _bucket,
945            } = &dbg!(&config).long_term_storage
946            else {
947                panic!("expected s3 storage");
948            };
949            // Env should overwrite the yaml config
950            assert_eq!(endpoint, "http://localhost:9001");
951
952            Ok(())
953        });
954    }
955}