objectstore_server/config.rs
1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//! type: filesystem
31//! path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::auth::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49use crate::rate_limits::RateLimits;
50
51/// Environment variable prefix for all configuration options.
52const ENV_PREFIX: &str = "OS__";
53
54/// Newtype around `String` that may protect against accidental
55/// logging of secrets in our configuration struct. Use with
56/// [`secrecy::SecretBox`].
57#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
58pub struct ConfigSecret(String);
59
60impl ConfigSecret {
61 /// Returns the secret value as a string slice.
62 pub fn as_str(&self) -> &str {
63 self.0.as_str()
64 }
65}
66
67impl From<&str> for ConfigSecret {
68 fn from(str: &str) -> Self {
69 ConfigSecret(str.to_string())
70 }
71}
72
73impl std::ops::Deref for ConfigSecret {
74 type Target = str;
75 fn deref(&self) -> &Self::Target {
76 &self.0
77 }
78}
79
80impl fmt::Debug for ConfigSecret {
81 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
82 write!(f, "[redacted]")
83 }
84}
85
86impl CloneableSecret for ConfigSecret {}
87impl SerializableSecret for ConfigSecret {}
88impl Zeroize for ConfigSecret {
89 fn zeroize(&mut self) {
90 self.0.zeroize();
91 }
92}
93
94/// Storage backend configuration.
95///
96/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
97///
98/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
99#[derive(Debug, Deserialize, Serialize)]
100#[serde(tag = "type", rename_all = "lowercase")]
101pub enum Storage {
102 /// Local filesystem storage backend (type `"filesystem"`).
103 ///
104 /// Stores objects as files on the local filesystem. Suitable for development, testing,
105 /// and single-server deployments.
106 ///
107 /// # Example
108 ///
109 /// ```yaml
110 /// long_term_storage:
111 /// type: filesystem
112 /// path: /data
113 /// ```
114 FileSystem {
115 /// Directory path for storing objects.
116 ///
117 /// The directory will be created if it doesn't exist. Relative paths are resolved from
118 /// the server's working directory.
119 ///
120 /// # Default
121 ///
122 /// `"data"` (relative to the server's working directory)
123 ///
124 /// # Environment Variables
125 ///
126 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
127 /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
128 ///
129 /// Or for long-term storage:
130 /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
131 /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
132 path: PathBuf,
133 },
134
135 /// S3-compatible storage backend (type `"s3compatible"`).
136 ///
137 /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
138 /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
139 ///
140 /// [Amazon S3]: https://aws.amazon.com/s3/
141 ///
142 /// # Example
143 ///
144 /// ```yaml
145 /// long_term_storage:
146 /// type: s3compatible
147 /// endpoint: https://s3.amazonaws.com
148 /// bucket: my-bucket
149 /// ```
150 S3Compatible {
151 /// S3 endpoint URL.
152 ///
153 /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
154 ///
155 /// # Environment Variables
156 ///
157 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
158 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
159 ///
160 /// Or for long-term storage:
161 /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
162 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
163 endpoint: String,
164
165 /// S3 bucket name.
166 ///
167 /// The bucket must exist before starting the server.
168 ///
169 /// # Environment Variables
170 ///
171 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
172 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
173 bucket: String,
174 },
175
176 /// [Google Cloud Storage] backend (type `"gcs"`).
177 ///
178 /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
179 /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
180 /// environment variable or GCE/GKE metadata service.
181 ///
182 /// **Note**: The bucket must be pre-created with the following lifecycle policy:
183 /// - `daysSinceCustomTime`: 1 day
184 /// - `action`: delete
185 ///
186 /// [Google Cloud Storage]: https://cloud.google.com/storage
187 ///
188 /// # Example
189 ///
190 /// ```yaml
191 /// long_term_storage:
192 /// type: gcs
193 /// bucket: objectstore-bucket
194 /// ```
195 Gcs {
196 /// Optional custom GCS endpoint URL.
197 ///
198 /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
199 ///
200 /// # Default
201 ///
202 /// `None` (uses default GCS endpoint)
203 ///
204 /// # Environment Variables
205 ///
206 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
207 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
208 ///
209 /// Or for long-term storage:
210 /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
211 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
212 endpoint: Option<String>,
213
214 /// GCS bucket name.
215 ///
216 /// The bucket must exist before starting the server.
217 ///
218 /// # Environment Variables
219 ///
220 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
221 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
222 bucket: String,
223 },
224
225 /// [Google Bigtable] backend (type `"bigtable"`).
226 ///
227 /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
228 /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
229 /// Application Default Credentials (ADC).
230 ///
231 /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
232 /// following column families:
233 /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
234 /// - `fm`: manual garbage collection (`no GC policy`)
235 ///
236 /// [Google Bigtable]: https://cloud.google.com/bigtable
237 ///
238 /// # Example
239 ///
240 /// ```yaml
241 /// high_volume_storage:
242 /// type: bigtable
243 /// project_id: my-project
244 /// instance_name: objectstore
245 /// table_name: objectstore
246 /// ```
247 BigTable {
248 /// Optional custom Bigtable endpoint.
249 ///
250 /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
251 ///
252 /// # Default
253 ///
254 /// `None` (uses default Bigtable endpoint)
255 ///
256 /// # Environment Variables
257 ///
258 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
259 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
260 ///
261 /// Or for long-term storage:
262 /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
263 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
264 endpoint: Option<String>,
265
266 /// GCP project ID.
267 ///
268 /// The Google project ID (not project number) containing the Bigtable instance.
269 ///
270 /// # Environment Variables
271 ///
272 /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
273 /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
274 project_id: String,
275
276 /// Bigtable instance name.
277 ///
278 /// # Environment Variables
279 ///
280 /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
281 /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
282 instance_name: String,
283
284 /// Bigtable table name.
285 ///
286 /// The table must exist before starting the server.
287 ///
288 /// # Environment Variables
289 ///
290 /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
291 /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
292 table_name: String,
293
294 /// Optional number of connections to maintain to Bigtable.
295 ///
296 /// # Default
297 ///
298 /// `None` (defaults to 1)
299 ///
300 /// # Environment Variables
301 ///
302 /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
303 /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
304 connections: Option<usize>,
305 },
306}
307
308/// Runtime configuration for the Tokio async runtime.
309///
310/// Controls the threading behavior of the server's async runtime.
311///
312/// Used in: [`Config::runtime`]
313#[derive(Debug, Deserialize, Serialize)]
314#[serde(default)]
315pub struct Runtime {
316 /// Number of worker threads for the server runtime.
317 ///
318 /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
319 /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
320 /// switching overhead.
321 ///
322 /// Set this in accordance with the resources available to the server, especially in Kubernetes
323 /// environments.
324 ///
325 /// # Default
326 ///
327 /// Defaults to the number of CPU cores on the host machine.
328 ///
329 /// # Environment Variable
330 ///
331 /// `OS__RUNTIME__WORKER_THREADS`
332 ///
333 /// # Considerations
334 ///
335 /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
336 /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
337 /// - Setting this too high can lead to increased memory usage and context switching
338 pub worker_threads: usize,
339
340 /// Interval in seconds for reporting internal runtime metrics.
341 ///
342 /// Defaults to `10` seconds.
343 #[serde(with = "humantime_serde")]
344 pub metrics_interval: Duration,
345}
346
347impl Default for Runtime {
348 fn default() -> Self {
349 Self {
350 worker_threads: num_cpus::get(),
351 metrics_interval: Duration::from_secs(10),
352 }
353 }
354}
355
356/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
357///
358/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
359/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
360///
361/// Used in: [`Config::sentry`]
362#[derive(Debug, Deserialize, Serialize)]
363pub struct Sentry {
364 /// Sentry DSN (Data Source Name).
365 ///
366 /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
367 /// integration is completely disabled.
368 ///
369 /// # Default
370 ///
371 /// `None` (Sentry disabled)
372 ///
373 /// # Environment Variable
374 ///
375 /// `OS__SENTRY__DSN`
376 pub dsn: Option<SecretBox<ConfigSecret>>,
377
378 /// Environment name for this deployment.
379 ///
380 /// Used to distinguish events from different environments (e.g., "production", "staging",
381 /// "development"). This appears in the Sentry UI and can be used for filtering.
382 ///
383 /// # Default
384 ///
385 /// `None`
386 ///
387 /// # Environment Variable
388 ///
389 /// `OS__SENTRY__ENVIRONMENT`
390 pub environment: Option<Cow<'static, str>>,
391
392 /// Server name or identifier.
393 ///
394 /// Used to identify which server instance sent an event. Useful in multi-server deployments for
395 /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
396 ///
397 /// # Default
398 ///
399 /// `None`
400 ///
401 /// # Environment Variable
402 ///
403 /// `OS__SENTRY__SERVER_NAME`
404 pub server_name: Option<Cow<'static, str>>,
405
406 /// Error event sampling rate.
407 ///
408 /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
409 /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
410 ///
411 /// # Default
412 ///
413 /// `1.0` (send all errors)
414 ///
415 /// # Environment Variable
416 ///
417 /// `OS__SENTRY__SAMPLE_RATE`
418 pub sample_rate: f32,
419
420 /// Performance trace sampling rate.
421 ///
422 /// Controls what percentage of transactions (traces) are sent to Sentry for performance
423 /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
424 ///
425 /// **Important**: Performance traces can generate significant data volume in high-traffic
426 /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
427 ///
428 /// # Default
429 ///
430 /// `0.01` (send 1% of traces)
431 ///
432 /// # Environment Variable
433 ///
434 /// `OS__SENTRY__TRACES_SAMPLE_RATE`
435 pub traces_sample_rate: f32,
436
437 /// Whether to inherit sampling decisions from incoming traces.
438 ///
439 /// When `true` (default), if an incoming request contains a distributed tracing header with a
440 /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
441 /// the local `traces_sample_rate` is always used instead.
442 ///
443 /// When this is enabled, the calling service effectively controls the sampling decision for the
444 /// entire trace. Set this to `false` if you want to have independent sampling control at the
445 /// objectstore level.
446 ///
447 /// # Default
448 ///
449 /// `true`
450 ///
451 /// # Environment Variable
452 ///
453 /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
454 pub inherit_sampling_decision: bool,
455
456 /// Enable Sentry SDK debug mode.
457 ///
458 /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
459 /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
460 /// production as it generates verbose logging.
461 ///
462 /// # Default
463 ///
464 /// `false`
465 ///
466 /// # Environment Variable
467 ///
468 /// `OS__SENTRY__DEBUG`
469 pub debug: bool,
470
471 /// Additional tags to attach to all Sentry events.
472 ///
473 /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
474 /// context such as deployment identifiers or environment details.
475 ///
476 /// # Default
477 ///
478 /// Empty (no tags)
479 ///
480 /// # Environment Variables
481 ///
482 /// Each tag is set individually:
483 /// - `OS__SENTRY__TAGS__FOO=foo`
484 /// - `OS__SENTRY__TAGS__BAR=bar`
485 ///
486 /// # YAML Example
487 ///
488 /// ```yaml
489 /// sentry:
490 /// tags:
491 /// foo: foo
492 /// bar: bar
493 /// ```
494 pub tags: BTreeMap<String, String>,
495}
496
497impl Sentry {
498 /// Returns whether Sentry integration is enabled.
499 ///
500 /// Sentry is considered enabled if a DSN is configured.
501 pub fn is_enabled(&self) -> bool {
502 self.dsn.is_some()
503 }
504}
505
506impl Default for Sentry {
507 fn default() -> Self {
508 Self {
509 dsn: None,
510 environment: None,
511 server_name: None,
512 sample_rate: 1.0,
513 traces_sample_rate: 0.01,
514 inherit_sampling_decision: true,
515 debug: false,
516 tags: BTreeMap::new(),
517 }
518 }
519}
520
521/// Log output format.
522///
523/// Controls how log messages are formatted. The format can be explicitly specified or
524/// auto-detected based on whether output is to a TTY.
525#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
526#[serde(rename_all = "lowercase")]
527pub enum LogFormat {
528 /// Auto detect the best format.
529 ///
530 /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
531 Auto,
532
533 /// Pretty printing with colors.
534 ///
535 /// ```text
536 /// INFO objectstore::http > objectstore starting
537 /// ```
538 Pretty,
539
540 /// Simplified plain text output.
541 ///
542 /// ```text
543 /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
544 /// ```
545 Simplified,
546
547 /// Dump out JSON lines.
548 ///
549 /// ```text
550 /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
551 /// ```
552 Json,
553}
554
555/// The logging format parse error.
556#[derive(Clone, Debug)]
557pub struct FormatParseError(String);
558
559impl fmt::Display for FormatParseError {
560 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
561 write!(
562 f,
563 r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
564 self.0
565 )
566 }
567}
568
569impl std::str::FromStr for LogFormat {
570 type Err = FormatParseError;
571
572 fn from_str(s: &str) -> Result<Self, Self::Err> {
573 let result = match s {
574 "" => LogFormat::Auto,
575 s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
576 s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
577 s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
578 s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
579 s => return Err(FormatParseError(s.into())),
580 };
581
582 Ok(result)
583 }
584}
585
586impl std::error::Error for FormatParseError {}
587
588mod display_fromstr {
589 pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
590 where
591 S: serde::Serializer,
592 T: std::fmt::Display,
593 {
594 serializer.collect_str(&value)
595 }
596
597 pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
598 where
599 D: serde::Deserializer<'de>,
600 T: std::str::FromStr,
601 <T as std::str::FromStr>::Err: std::fmt::Display,
602 {
603 use serde::Deserialize;
604 let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
605 s.parse().map_err(serde::de::Error::custom)
606 }
607}
608
609/// Logging configuration.
610///
611/// Controls the verbosity and format of log output. Logs are always written to stderr.
612///
613/// Used in: [`Config::logging`]
614#[derive(Debug, Deserialize, Serialize)]
615pub struct Logging {
616 /// Minimum log level to output.
617 ///
618 /// Controls which log messages are emitted based on their severity. Messages at or above this
619 /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
620 /// OFF.
621 ///
622 /// The `RUST_LOG` environment variable provides more granular control per module if needed.
623 ///
624 /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
625 /// only for debugging.
626 ///
627 /// # Default
628 ///
629 /// `INFO`
630 ///
631 /// # Environment Variable
632 ///
633 /// `OS__LOGGING__LEVEL`
634 ///
635 /// # Considerations
636 ///
637 /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
638 /// - `INFO` is appropriate for production
639 /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
640 /// -
641 #[serde(with = "display_fromstr")]
642 pub level: LevelFilter,
643
644 /// Log output format.
645 ///
646 /// Determines how log messages are formatted. See [`LogFormat`] for available options and
647 /// examples.
648 ///
649 /// # Default
650 ///
651 /// `Auto` (pretty for TTY, simplified otherwise)
652 ///
653 /// # Environment Variable
654 ///
655 /// `OS__LOGGING__FORMAT`
656 pub format: LogFormat,
657}
658
659impl Default for Logging {
660 fn default() -> Self {
661 Self {
662 level: LevelFilter::INFO,
663 format: LogFormat::Auto,
664 }
665 }
666}
667
668// Metrics configuration is defined in `objectstore_metrics::MetricsConfig`.
669
670/// A key that may be used to verify a request's `Authorization` header and its
671/// associated permissions. May contain multiple key versions to facilitate rotation.
672#[derive(Debug, Deserialize, Serialize)]
673pub struct AuthZVerificationKey {
674 /// Files that contain versions of this key's key material which may be used to verify
675 /// signatures.
676 ///
677 /// If a key is being rotated, the old and new versions of that key should both be
678 /// configured so objectstore can verify signatures while the updated key is still
679 /// rolling out. Otherwise, this should only contain the most recent version of a key.
680 pub key_files: Vec<PathBuf>,
681
682 /// The maximum set of permissions that this key's signer is authorized to grant.
683 ///
684 /// If a request's `Authorization` header grants full permission but it was signed by
685 /// a key that is only allowed to grant read permission, then the request only has
686 /// read permission.
687 #[serde(default)]
688 pub max_permissions: HashSet<Permission>,
689}
690
691/// Configuration for content-based authorization.
692#[derive(Debug, Default, Deserialize, Serialize)]
693pub struct AuthZ {
694 /// Whether to enforce content-based authorization or not.
695 ///
696 /// If this is set to `false`, checks are still performed but failures will not result
697 /// in `403 Unauthorized` responses.
698 pub enforce: bool,
699
700 /// Keys that may be used to verify a request's `Authorization` header.
701 ///
702 /// This field is a container that is keyed on a key's ID. When verifying a JWT
703 /// from the `Authorization` header, the `kid` field should be read from the JWT
704 /// header and used to index into this map to select the appropriate key.
705 #[serde(default)]
706 pub keys: BTreeMap<String, AuthZVerificationKey>,
707}
708
709/// Main configuration struct for the objectstore server.
710///
711/// This is the top-level configuration that combines all server settings including networking,
712/// storage backends, runtime, and observability options.
713///
714/// Configuration is loaded with the following precedence (highest to lowest):
715/// 1. Environment variables (prefixed with `OS__`)
716/// 2. YAML configuration file (if provided via `-c` flag)
717/// 3. Default values
718///
719/// See individual field documentation for details on each configuration option, including
720/// defaults and environment variables.
721#[derive(Debug, Deserialize, Serialize)]
722pub struct Config {
723 /// HTTP server bind address.
724 ///
725 /// The socket address (IP and port) where the HTTP server will listen for incoming
726 /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
727 /// makes the server accessible from all network interfaces.
728 ///
729 /// # Default
730 ///
731 /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
732 ///
733 /// # Environment Variable
734 ///
735 /// `OS__HTTP_ADDR`
736 pub http_addr: SocketAddr,
737
738 /// Storage backend for high-volume, small objects.
739 ///
740 /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
741 /// access with many small objects is desired. Good candidates include Bigtable, local
742 /// filesystem (for development), or fast SSDs. Can be set to the same backend as
743 /// `long_term_storage` for simplicity.
744 ///
745 /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
746 /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
747 /// change in the future and more configuration options will be added to influence this
748 /// decision.
749 ///
750 /// # Default
751 ///
752 /// Filesystem storage in `./data/high-volume` directory
753 ///
754 /// # Environment Variables
755 ///
756 /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
757 /// options.
758 ///
759 /// # Example
760 ///
761 /// ```yaml
762 /// high_volume_storage:
763 /// type: bigtable
764 /// project_id: my-project
765 /// instance_name: objectstore
766 /// table_name: objectstore
767 /// ```
768 pub high_volume_storage: Storage,
769
770 /// Storage backend for large objects with long-term retention.
771 ///
772 /// This backend is used for larger objects in scenarios with lower throughput and higher
773 /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
774 /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
775 ///
776 /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
777 /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
778 /// change in the future and more configuration options will be added to influence this
779 /// decision.
780 ///
781 /// # Default
782 ///
783 /// Filesystem storage in `./data/long-term` directory
784 ///
785 /// # Environment Variables
786 ///
787 /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
788 /// - Additional fields depending on the type (see [`Storage`])
789 ///
790 /// # Example
791 ///
792 /// ```yaml
793 /// long_term_storage:
794 /// type: gcs
795 /// bucket: my-objectstore-bucket
796 /// ```
797 pub long_term_storage: Storage,
798
799 /// Configuration of the internal task runtime.
800 ///
801 /// Controls the thread pool size and behavior of the async runtime powering the server.
802 /// See [`Runtime`] for configuration options.
803 pub runtime: Runtime,
804
805 /// Logging configuration.
806 ///
807 /// Controls log verbosity and output format. See [`Logging`] for configuration options.
808 pub logging: Logging,
809
810 /// Sentry error tracking configuration.
811 ///
812 /// Optional integration with Sentry for error tracking and performance monitoring.
813 /// See [`Sentry`] for configuration options.
814 pub sentry: Sentry,
815
816 /// Internal metrics configuration.
817 ///
818 /// Configures submission of internal metrics to a DogStatsD-compatible endpoint.
819 /// See [`objectstore_metrics::MetricsConfig`] for configuration options.
820 pub metrics: objectstore_metrics::MetricsConfig,
821
822 /// Content-based authorization configuration.
823 ///
824 /// Controls the verification and enforcement of content-based access control based on the
825 /// JWT in a request's `Authorization` header.
826 pub auth: AuthZ,
827
828 /// A list of matchers for requests to discard without processing.
829 pub killswitches: Killswitches,
830
831 /// Definitions for rate limits to enforce on incoming requests.
832 pub rate_limits: RateLimits,
833
834 /// Configuration for the [`StorageService`](objectstore_service::StorageService).
835 pub service: Service,
836
837 /// Configuration for the HTTP layer.
838 ///
839 /// Controls HTTP-level settings that operate before requests reach the
840 /// storage service. See [`Http`] for configuration options.
841 pub http: Http,
842}
843
844/// Configuration for the [`StorageService`](objectstore_service::StorageService).
845///
846/// Controls operational parameters of the storage service layer that sits
847/// between the HTTP server and the storage backends.
848///
849/// Used in: [`Config::service`]
850///
851/// # Environment Variables
852///
853/// - `OS__SERVICE__MAX_CONCURRENCY`
854#[derive(Debug, Deserialize, Serialize)]
855#[serde(default)]
856pub struct Service {
857 /// Maximum number of concurrent backend operations.
858 ///
859 /// This caps the total number of in-flight storage operations (reads,
860 /// writes, deletes) across all requests. Operations that exceed the limit
861 /// are rejected with HTTP 429.
862 ///
863 /// # Default
864 ///
865 /// [`DEFAULT_CONCURRENCY_LIMIT`](objectstore_service::service::DEFAULT_CONCURRENCY_LIMIT)
866 pub max_concurrency: usize,
867}
868
869impl Default for Service {
870 fn default() -> Self {
871 Self {
872 max_concurrency: objectstore_service::service::DEFAULT_CONCURRENCY_LIMIT,
873 }
874 }
875}
876
877/// Default maximum number of concurrent in-flight HTTP requests.
878///
879/// Requests beyond this limit are rejected with HTTP 503.
880pub const DEFAULT_MAX_HTTP_REQUESTS: usize = 10_000;
881
882/// Configuration for the HTTP layer.
883///
884/// Controls behaviour at the HTTP request level, before requests reach the
885/// storage service. Grouping these settings separately from [`Service`] keeps
886/// HTTP-layer and service-layer concerns distinct and provides a natural home
887/// for future HTTP-level settings (e.g. timeouts, body size limits).
888///
889/// Used in: [`Config::http`]
890///
891/// # Environment Variables
892///
893/// - `OS__HTTP__MAX_REQUESTS`
894#[derive(Debug, Deserialize, Serialize)]
895#[serde(default)]
896pub struct Http {
897 /// Maximum number of concurrent in-flight HTTP requests.
898 ///
899 /// This is a flood protection limit. When the number of requests currently
900 /// being processed reaches this value, new requests are rejected immediately
901 /// with HTTP 503. Health and readiness endpoints (`/health`, `/ready`) are
902 /// excluded from this limit.
903 ///
904 /// Unlike readiness-based backpressure, direct rejection responds in
905 /// milliseconds and recovers the moment any in-flight request completes.
906 ///
907 /// # Default
908 ///
909 /// [`DEFAULT_MAX_HTTP_REQUESTS`]
910 ///
911 /// # Environment Variable
912 ///
913 /// `OS__HTTP__MAX_REQUESTS`
914 pub max_requests: usize,
915}
916
917impl Default for Http {
918 fn default() -> Self {
919 Self {
920 max_requests: DEFAULT_MAX_HTTP_REQUESTS,
921 }
922 }
923}
924
925impl Default for Config {
926 fn default() -> Self {
927 Self {
928 http_addr: "0.0.0.0:8888".parse().unwrap(),
929
930 high_volume_storage: Storage::FileSystem {
931 path: PathBuf::from("data/high-volume"),
932 },
933 long_term_storage: Storage::FileSystem {
934 path: PathBuf::from("data/long-term"),
935 },
936
937 runtime: Runtime::default(),
938 logging: Logging::default(),
939 sentry: Sentry::default(),
940 metrics: objectstore_metrics::MetricsConfig::default(),
941 auth: AuthZ::default(),
942 killswitches: Killswitches::default(),
943 rate_limits: RateLimits::default(),
944 service: Service::default(),
945 http: Http::default(),
946 }
947 }
948}
949
950impl Config {
951 /// Loads configuration from the provided arguments.
952 ///
953 /// Configuration is merged in the following order (later sources override earlier ones):
954 /// 1. Default values
955 /// 2. YAML configuration file (if provided in `args`)
956 /// 3. Environment variables (prefixed with `OS__`)
957 ///
958 /// # Errors
959 ///
960 /// Returns an error if:
961 /// - The YAML configuration file cannot be read or parsed
962 /// - Environment variables contain invalid values
963 /// - Required fields are missing or invalid
964 pub fn load(path: Option<&Path>) -> Result<Self> {
965 let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
966 if let Some(path) = path {
967 figment = figment.merge(Yaml::file(path));
968 }
969 let config = figment
970 .merge(Env::prefixed(ENV_PREFIX).split("__"))
971 .extract()?;
972
973 Ok(config)
974 }
975}
976
977#[cfg(test)]
978#[expect(
979 clippy::result_large_err,
980 reason = "figment::Error is inherently large"
981)]
982mod tests {
983 use std::io::Write;
984
985 use secrecy::ExposeSecret;
986
987 use crate::killswitches::Killswitch;
988 use crate::rate_limits::{BandwidthLimits, RateLimits, ThroughputLimits, ThroughputRule};
989
990 use super::*;
991
992 #[test]
993 fn configurable_via_env() {
994 figment::Jail::expect_with(|jail| {
995 jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
996 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
997 jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
998 jail.set_env("OS__METRICS__TAGS__FOO", "bar");
999 jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
1000 jail.set_env("OS__SENTRY__DSN", "abcde");
1001 jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
1002 jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
1003 jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
1004 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
1005 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
1006
1007 let config = Config::load(None).unwrap();
1008
1009 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1010 else {
1011 panic!("expected s3 storage");
1012 };
1013 assert_eq!(endpoint, "http://localhost:8888");
1014 assert_eq!(bucket, "whatever");
1015 assert_eq!(
1016 config.metrics.tags,
1017 [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
1018 );
1019
1020 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1021 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1022 assert_eq!(
1023 config.sentry.server_name.as_deref(),
1024 Some("objectstore-deadbeef")
1025 );
1026 assert_eq!(config.sentry.sample_rate, 0.5);
1027 assert_eq!(config.sentry.traces_sample_rate, 0.5);
1028
1029 Ok(())
1030 });
1031 }
1032
1033 #[test]
1034 fn configurable_via_yaml() {
1035 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1036 tempfile
1037 .write_all(
1038 br#"
1039 long_term_storage:
1040 type: s3compatible
1041 endpoint: http://localhost:8888
1042 bucket: whatever
1043 sentry:
1044 dsn: abcde
1045 environment: production
1046 server_name: objectstore-deadbeef
1047 sample_rate: 0.5
1048 traces_sample_rate: 0.5
1049 "#,
1050 )
1051 .unwrap();
1052
1053 figment::Jail::expect_with(|_jail| {
1054 let config = Config::load(Some(tempfile.path())).unwrap();
1055
1056 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1057 else {
1058 panic!("expected s3 storage");
1059 };
1060 assert_eq!(endpoint, "http://localhost:8888");
1061 assert_eq!(bucket, "whatever");
1062
1063 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1064 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1065 assert_eq!(
1066 config.sentry.server_name.as_deref(),
1067 Some("objectstore-deadbeef")
1068 );
1069 assert_eq!(config.sentry.sample_rate, 0.5);
1070 assert_eq!(config.sentry.traces_sample_rate, 0.5);
1071
1072 Ok(())
1073 });
1074 }
1075
1076 #[test]
1077 fn configured_with_env_and_yaml() {
1078 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1079 tempfile
1080 .write_all(
1081 br#"
1082 long_term_storage:
1083 type: s3compatible
1084 endpoint: http://localhost:8888
1085 bucket: whatever
1086 "#,
1087 )
1088 .unwrap();
1089
1090 figment::Jail::expect_with(|jail| {
1091 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1092
1093 let config = Config::load(Some(tempfile.path())).unwrap();
1094
1095 let Storage::S3Compatible {
1096 endpoint,
1097 bucket: _bucket,
1098 } = &dbg!(&config).long_term_storage
1099 else {
1100 panic!("expected s3 storage");
1101 };
1102 // Env should overwrite the yaml config
1103 assert_eq!(endpoint, "http://localhost:9001");
1104
1105 Ok(())
1106 });
1107 }
1108
1109 #[test]
1110 fn metrics_addr_via_env() {
1111 figment::Jail::expect_with(|jail| {
1112 jail.set_env("OS__METRICS__ADDR", "127.0.0.1:8125");
1113
1114 let config = Config::load(None).unwrap();
1115 assert_eq!(config.metrics.addr.as_deref(), Some("127.0.0.1:8125"));
1116
1117 Ok(())
1118 });
1119 }
1120
1121 #[test]
1122 fn configure_auth_with_env() {
1123 figment::Jail::expect_with(|jail| {
1124 jail.set_env("OS__AUTH__ENFORCE", "true");
1125 jail.set_env(
1126 "OS__AUTH__KEYS",
1127 r#"{kid1={key_files=["abcde","fghij","this is a test\n multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1128 );
1129
1130 let config = Config::load(None).unwrap();
1131
1132 assert!(config.auth.enforce);
1133
1134 let kid1 = config.auth.keys.get("kid1").unwrap();
1135 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1136 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1137 assert_eq!(
1138 kid1.key_files[2],
1139 Path::new("this is a test\n multiline string\nend of string\n"),
1140 );
1141 assert_eq!(
1142 kid1.max_permissions,
1143 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1144 );
1145
1146 let kid2 = config.auth.keys.get("kid2").unwrap();
1147 assert_eq!(kid2.key_files[0], Path::new("12345"));
1148 assert_eq!(kid2.max_permissions, HashSet::new());
1149
1150 Ok(())
1151 });
1152 }
1153
1154 #[test]
1155 fn configure_auth_with_yaml() {
1156 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1157 tempfile
1158 .write_all(
1159 br#"
1160 auth:
1161 enforce: true
1162 keys:
1163 kid1:
1164 key_files:
1165 - "abcde"
1166 - "fghij"
1167 - |
1168 this is a test
1169 multiline string
1170 end of string
1171 max_permissions:
1172 - "object.read"
1173 - "object.write"
1174 kid2:
1175 key_files:
1176 - "12345"
1177 "#,
1178 )
1179 .unwrap();
1180
1181 figment::Jail::expect_with(|_jail| {
1182 let config = Config::load(Some(tempfile.path())).unwrap();
1183
1184 assert!(config.auth.enforce);
1185
1186 let kid1 = config.auth.keys.get("kid1").unwrap();
1187 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1188 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1189 assert_eq!(
1190 kid1.key_files[2],
1191 Path::new("this is a test\n multiline string\nend of string\n")
1192 );
1193 assert_eq!(
1194 kid1.max_permissions,
1195 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1196 );
1197
1198 let kid2 = config.auth.keys.get("kid2").unwrap();
1199 assert_eq!(kid2.key_files[0], Path::new("12345"));
1200 assert_eq!(kid2.max_permissions, HashSet::new());
1201
1202 Ok(())
1203 });
1204 }
1205
1206 #[test]
1207 fn configure_killswitches_with_yaml() {
1208 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1209 tempfile
1210 .write_all(
1211 br#"
1212 killswitches:
1213 - usecase: broken_usecase
1214 - scopes:
1215 org: "42"
1216 - service: "test-*"
1217 - scopes:
1218 org: "42"
1219 project: "4711"
1220 - usecase: attachments
1221 scopes:
1222 org: "42"
1223 service: "test-*"
1224 "#,
1225 )
1226 .unwrap();
1227
1228 figment::Jail::expect_with(|_jail| {
1229 let expected = [
1230 Killswitch {
1231 usecase: Some("broken_usecase".into()),
1232 scopes: BTreeMap::new(),
1233 service: None,
1234 service_matcher: std::sync::OnceLock::new(),
1235 },
1236 Killswitch {
1237 usecase: None,
1238 scopes: BTreeMap::from([("org".into(), "42".into())]),
1239 service: None,
1240 service_matcher: std::sync::OnceLock::new(),
1241 },
1242 Killswitch {
1243 usecase: None,
1244 scopes: BTreeMap::new(),
1245 service: Some("test-*".into()),
1246 service_matcher: std::sync::OnceLock::new(),
1247 },
1248 Killswitch {
1249 usecase: None,
1250 scopes: BTreeMap::from([
1251 ("org".into(), "42".into()),
1252 ("project".into(), "4711".into()),
1253 ]),
1254 service: None,
1255 service_matcher: std::sync::OnceLock::new(),
1256 },
1257 Killswitch {
1258 usecase: Some("attachments".into()),
1259 scopes: BTreeMap::from([("org".into(), "42".into())]),
1260 service: Some("test-*".into()),
1261 service_matcher: std::sync::OnceLock::new(),
1262 },
1263 ];
1264
1265 let config = Config::load(Some(tempfile.path())).unwrap();
1266 assert_eq!(&config.killswitches.0, &expected,);
1267
1268 Ok(())
1269 });
1270 }
1271
1272 #[test]
1273 fn configure_rate_limits_with_yaml() {
1274 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1275 tempfile
1276 .write_all(
1277 br#"
1278 rate_limits:
1279 throughput:
1280 global_rps: 1000
1281 burst: 100
1282 usecase_pct: 50
1283 scope_pct: 25
1284 rules:
1285 - usecase: "high_priority"
1286 scopes:
1287 - ["org", "123"]
1288 rps: 500
1289 - scopes:
1290 - ["org", "456"]
1291 - ["project", "789"]
1292 pct: 10
1293 bandwidth:
1294 global_bps: 1048576
1295 usecase_pct: 50
1296 scope_pct: 25
1297 "#,
1298 )
1299 .unwrap();
1300
1301 figment::Jail::expect_with(|_jail| {
1302 let expected = RateLimits {
1303 throughput: ThroughputLimits {
1304 global_rps: Some(1000),
1305 burst: 100,
1306 usecase_pct: Some(50),
1307 scope_pct: Some(25),
1308 rules: vec![
1309 ThroughputRule {
1310 usecase: Some("high_priority".to_string()),
1311 scopes: vec![("org".to_string(), "123".to_string())],
1312 rps: Some(500),
1313 pct: None,
1314 },
1315 ThroughputRule {
1316 usecase: None,
1317 scopes: vec![
1318 ("org".to_string(), "456".to_string()),
1319 ("project".to_string(), "789".to_string()),
1320 ],
1321 rps: None,
1322 pct: Some(10),
1323 },
1324 ],
1325 },
1326 bandwidth: BandwidthLimits {
1327 global_bps: Some(1_048_576),
1328 usecase_pct: Some(50),
1329 scope_pct: Some(25),
1330 },
1331 };
1332
1333 let config = Config::load(Some(tempfile.path())).unwrap();
1334 assert_eq!(config.rate_limits, expected);
1335
1336 Ok(())
1337 });
1338 }
1339}