objectstore_server/config.rs
1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//! type: filesystem
31//! path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39
40use anyhow::Result;
41use figment::providers::{Env, Format, Serialized, Yaml};
42use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
43use serde::{Deserialize, Serialize};
44use tracing::level_filters::LevelFilter;
45
46use crate::auth::Permission;
47
48/// Environment variable prefix for all configuration options.
49const ENV_PREFIX: &str = "OS__";
50
51/// Newtype around `String` that may protect against accidental
52/// logging of secrets in our configuration struct. Use with
53/// [`secrecy::SecretBox`].
54#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
55pub struct ConfigSecret(String);
56
57impl ConfigSecret {
58 pub fn as_str(&self) -> &str {
59 self.0.as_str()
60 }
61}
62
63impl std::ops::Deref for ConfigSecret {
64 type Target = str;
65 fn deref(&self) -> &Self::Target {
66 &self.0
67 }
68}
69
70impl fmt::Debug for ConfigSecret {
71 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
72 write!(f, "[redacted]")
73 }
74}
75
76impl CloneableSecret for ConfigSecret {}
77impl SerializableSecret for ConfigSecret {}
78impl Zeroize for ConfigSecret {
79 fn zeroize(&mut self) {
80 self.0.zeroize();
81 }
82}
83
84/// Storage backend configuration.
85///
86/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
87///
88/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
89#[derive(Debug, Clone, Deserialize, Serialize)]
90#[serde(tag = "type", rename_all = "lowercase")]
91pub enum Storage {
92 /// Local filesystem storage backend (type `"filesystem"`).
93 ///
94 /// Stores objects as files on the local filesystem. Suitable for development, testing,
95 /// and single-server deployments.
96 ///
97 /// # Example
98 ///
99 /// ```yaml
100 /// long_term_storage:
101 /// type: filesystem
102 /// path: /data
103 /// ```
104 FileSystem {
105 /// Directory path for storing objects.
106 ///
107 /// The directory will be created if it doesn't exist. Relative paths are resolved from
108 /// the server's working directory.
109 ///
110 /// # Default
111 ///
112 /// `"data"` (relative to the server's working directory)
113 ///
114 /// # Environment Variables
115 ///
116 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
117 /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
118 ///
119 /// Or for long-term storage:
120 /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
121 /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
122 path: PathBuf,
123 },
124
125 /// S3-compatible storage backend (type `"s3compatible"`).
126 ///
127 /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
128 /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
129 ///
130 /// [Amazon S3]: https://aws.amazon.com/s3/
131 ///
132 /// # Example
133 ///
134 /// ```yaml
135 /// long_term_storage:
136 /// type: s3compatible
137 /// endpoint: https://s3.amazonaws.com
138 /// bucket: my-bucket
139 /// ```
140 S3Compatible {
141 /// S3 endpoint URL.
142 ///
143 /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
144 ///
145 /// # Environment Variables
146 ///
147 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
148 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
149 ///
150 /// Or for long-term storage:
151 /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
152 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
153 endpoint: String,
154
155 /// S3 bucket name.
156 ///
157 /// The bucket must exist before starting the server.
158 ///
159 /// # Environment Variables
160 ///
161 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
162 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
163 bucket: String,
164 },
165
166 /// [Google Cloud Storage] backend (type `"gcs"`).
167 ///
168 /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
169 /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
170 /// environment variable or GCE/GKE metadata service.
171 ///
172 /// **Note**: The bucket must be pre-created with the following lifecycle policy:
173 /// - `daysSinceCustomTime`: 1 day
174 /// - `action`: delete
175 ///
176 /// [Google Cloud Storage]: https://cloud.google.com/storage
177 ///
178 /// # Example
179 ///
180 /// ```yaml
181 /// long_term_storage:
182 /// type: gcs
183 /// bucket: objectstore-bucket
184 /// ```
185 Gcs {
186 /// Optional custom GCS endpoint URL.
187 ///
188 /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
189 ///
190 /// # Default
191 ///
192 /// `None` (uses default GCS endpoint)
193 ///
194 /// # Environment Variables
195 ///
196 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
197 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
198 ///
199 /// Or for long-term storage:
200 /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
201 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
202 endpoint: Option<String>,
203
204 /// GCS bucket name.
205 ///
206 /// The bucket must exist before starting the server.
207 ///
208 /// # Environment Variables
209 ///
210 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
211 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
212 bucket: String,
213 },
214
215 /// [Google Bigtable] backend (type `"bigtable"`).
216 ///
217 /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
218 /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
219 /// Application Default Credentials (ADC).
220 ///
221 /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
222 /// following column families:
223 /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
224 /// - `fm`: manual garbage collection (`no GC policy`)
225 ///
226 /// [Google Bigtable]: https://cloud.google.com/bigtable
227 ///
228 /// # Example
229 ///
230 /// ```yaml
231 /// high_volume_storage:
232 /// type: bigtable
233 /// project_id: my-project
234 /// instance_name: objectstore
235 /// table_name: objectstore
236 /// ```
237 BigTable {
238 /// Optional custom Bigtable endpoint.
239 ///
240 /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
241 ///
242 /// # Default
243 ///
244 /// `None` (uses default Bigtable endpoint)
245 ///
246 /// # Environment Variables
247 ///
248 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
249 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
250 ///
251 /// Or for long-term storage:
252 /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
253 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
254 endpoint: Option<String>,
255
256 /// GCP project ID.
257 ///
258 /// The Google project ID (not project number) containing the Bigtable instance.
259 ///
260 /// # Environment Variables
261 ///
262 /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
263 /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
264 project_id: String,
265
266 /// Bigtable instance name.
267 ///
268 /// # Environment Variables
269 ///
270 /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
271 /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
272 instance_name: String,
273
274 /// Bigtable table name.
275 ///
276 /// The table must exist before starting the server.
277 ///
278 /// # Environment Variables
279 ///
280 /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
281 /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
282 table_name: String,
283
284 /// Optional number of connections to maintain to Bigtable.
285 ///
286 /// # Default
287 ///
288 /// `None` (infers connection count based on CPU count)
289 ///
290 /// # Environment Variables
291 ///
292 /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
293 /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
294 connections: Option<usize>,
295 },
296}
297
298/// Runtime configuration for the Tokio async runtime.
299///
300/// Controls the threading behavior of the server's async runtime.
301///
302/// Used in: [`Config::runtime`]
303#[derive(Debug, Clone, Deserialize, Serialize)]
304#[serde(default)]
305pub struct Runtime {
306 /// Number of worker threads for the server runtime.
307 ///
308 /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
309 /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
310 /// switching overhead.
311 ///
312 /// Set this in accordance with the resources available to the server, especially in Kubernetes
313 /// environments.
314 ///
315 /// # Default
316 ///
317 /// Defaults to the number of CPU cores on the host machine.
318 ///
319 /// # Environment Variable
320 ///
321 /// `OS__RUNTIME__WORKER_THREADS`
322 ///
323 /// # Considerations
324 ///
325 /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
326 /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
327 /// - Setting this too high can lead to increased memory usage and context switching
328 pub worker_threads: usize,
329}
330
331impl Default for Runtime {
332 fn default() -> Self {
333 Self {
334 worker_threads: num_cpus::get(),
335 }
336 }
337}
338
339/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
340///
341/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
342/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
343///
344/// Used in: [`Config::sentry`]
345#[derive(Debug, Clone, Deserialize, Serialize)]
346pub struct Sentry {
347 /// Sentry DSN (Data Source Name).
348 ///
349 /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
350 /// integration is completely disabled.
351 ///
352 /// # Default
353 ///
354 /// `None` (Sentry disabled)
355 ///
356 /// # Environment Variable
357 ///
358 /// `OS__SENTRY__DSN`
359 pub dsn: Option<SecretBox<ConfigSecret>>,
360
361 /// Environment name for this deployment.
362 ///
363 /// Used to distinguish events from different environments (e.g., "production", "staging",
364 /// "development"). This appears in the Sentry UI and can be used for filtering.
365 ///
366 /// # Default
367 ///
368 /// `None`
369 ///
370 /// # Environment Variable
371 ///
372 /// `OS__SENTRY__ENVIRONMENT`
373 pub environment: Option<Cow<'static, str>>,
374
375 /// Server name or identifier.
376 ///
377 /// Used to identify which server instance sent an event. Useful in multi-server deployments for
378 /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
379 ///
380 /// # Default
381 ///
382 /// `None`
383 ///
384 /// # Environment Variable
385 ///
386 /// `OS__SENTRY__SERVER_NAME`
387 pub server_name: Option<Cow<'static, str>>,
388
389 /// Error event sampling rate.
390 ///
391 /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
392 /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
393 ///
394 /// # Default
395 ///
396 /// `1.0` (send all errors)
397 ///
398 /// # Environment Variable
399 ///
400 /// `OS__SENTRY__SAMPLE_RATE`
401 pub sample_rate: f32,
402
403 /// Performance trace sampling rate.
404 ///
405 /// Controls what percentage of transactions (traces) are sent to Sentry for performance
406 /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
407 ///
408 /// **Important**: Performance traces can generate significant data volume in high-traffic
409 /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
410 ///
411 /// # Default
412 ///
413 /// `0.01` (send 1% of traces)
414 ///
415 /// # Environment Variable
416 ///
417 /// `OS__SENTRY__TRACES_SAMPLE_RATE`
418 pub traces_sample_rate: f32,
419
420 /// Whether to inherit sampling decisions from incoming traces.
421 ///
422 /// When `true` (default), if an incoming request contains a distributed tracing header with a
423 /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
424 /// the local `traces_sample_rate` is always used instead.
425 ///
426 /// When this is enabled, the calling service effectively controls the sampling decision for the
427 /// entire trace. Set this to `false` if you want to have independent sampling control at the
428 /// objectstore level.
429 ///
430 /// # Default
431 ///
432 /// `true`
433 ///
434 /// # Environment Variable
435 ///
436 /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
437 pub inherit_sampling_decision: bool,
438
439 /// Enable Sentry SDK debug mode.
440 ///
441 /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
442 /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
443 /// production as it generates verbose logging.
444 ///
445 /// # Default
446 ///
447 /// `false`
448 ///
449 /// # Environment Variable
450 ///
451 /// `OS__SENTRY__DEBUG`
452 pub debug: bool,
453
454 /// Additional tags to attach to all Sentry events.
455 ///
456 /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
457 /// context such as deployment identifiers or environment details.
458 ///
459 /// # Default
460 ///
461 /// Empty (no tags)
462 ///
463 /// # Environment Variables
464 ///
465 /// Each tag is set individually:
466 /// - `OS__SENTRY__TAGS__FOO=foo`
467 /// - `OS__SENTRY__TAGS__BAR=bar`
468 ///
469 /// # YAML Example
470 ///
471 /// ```yaml
472 /// sentry:
473 /// tags:
474 /// foo: foo
475 /// bar: bar
476 /// ```
477 pub tags: BTreeMap<String, String>,
478}
479
480impl Sentry {
481 /// Returns whether Sentry integration is enabled.
482 ///
483 /// Sentry is considered enabled if a DSN is configured.
484 pub fn is_enabled(&self) -> bool {
485 self.dsn.is_some()
486 }
487}
488
489impl Default for Sentry {
490 fn default() -> Self {
491 Self {
492 dsn: None,
493 environment: None,
494 server_name: None,
495 sample_rate: 1.0,
496 traces_sample_rate: 0.01,
497 inherit_sampling_decision: true,
498 debug: false,
499 tags: BTreeMap::new(),
500 }
501 }
502}
503
504/// Log output format.
505///
506/// Controls how log messages are formatted. The format can be explicitly specified or
507/// auto-detected based on whether output is to a TTY.
508#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
509#[serde(rename_all = "lowercase")]
510pub enum LogFormat {
511 /// Auto detect the best format.
512 ///
513 /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
514 Auto,
515
516 /// Pretty printing with colors.
517 ///
518 /// ```text
519 /// INFO objectstore::http > objectstore starting
520 /// ```
521 Pretty,
522
523 /// Simplified plain text output.
524 ///
525 /// ```text
526 /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
527 /// ```
528 Simplified,
529
530 /// Dump out JSON lines.
531 ///
532 /// ```text
533 /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
534 /// ```
535 Json,
536}
537
538/// The logging format parse error.
539#[derive(Clone, Debug)]
540pub struct FormatParseError(String);
541
542impl fmt::Display for FormatParseError {
543 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
544 write!(
545 f,
546 r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
547 self.0
548 )
549 }
550}
551
552impl std::str::FromStr for LogFormat {
553 type Err = FormatParseError;
554
555 fn from_str(s: &str) -> Result<Self, Self::Err> {
556 let result = match s {
557 "" => LogFormat::Auto,
558 s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
559 s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
560 s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
561 s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
562 s => return Err(FormatParseError(s.into())),
563 };
564
565 Ok(result)
566 }
567}
568
569impl std::error::Error for FormatParseError {}
570
571mod display_fromstr {
572 pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
573 where
574 S: serde::Serializer,
575 T: std::fmt::Display,
576 {
577 serializer.collect_str(&value)
578 }
579
580 pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
581 where
582 D: serde::Deserializer<'de>,
583 T: std::str::FromStr,
584 <T as std::str::FromStr>::Err: std::fmt::Display,
585 {
586 use serde::Deserialize;
587 let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
588 s.parse().map_err(serde::de::Error::custom)
589 }
590}
591
592/// Logging configuration.
593///
594/// Controls the verbosity and format of log output. Logs are always written to stderr.
595///
596/// Used in: [`Config::logging`]
597#[derive(Debug, Clone, Deserialize, Serialize)]
598pub struct Logging {
599 /// Minimum log level to output.
600 ///
601 /// Controls which log messages are emitted based on their severity. Messages at or above this
602 /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
603 /// OFF.
604 ///
605 /// The `RUST_LOG` environment variable provides more granular control per module if needed.
606 ///
607 /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
608 /// only for debugging.
609 ///
610 /// # Default
611 ///
612 /// `INFO`
613 ///
614 /// # Environment Variable
615 ///
616 /// `OS__LOGGING__LEVEL`
617 ///
618 /// # Considerations
619 ///
620 /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
621 /// - `INFO` is appropriate for production
622 /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
623 /// -
624 #[serde(with = "display_fromstr")]
625 pub level: LevelFilter,
626
627 /// Log output format.
628 ///
629 /// Determines how log messages are formatted. See [`LogFormat`] for available options and
630 /// examples.
631 ///
632 /// # Default
633 ///
634 /// `Auto` (pretty for TTY, simplified otherwise)
635 ///
636 /// # Environment Variable
637 ///
638 /// `OS__LOGGING__FORMAT`
639 pub format: LogFormat,
640}
641
642impl Default for Logging {
643 fn default() -> Self {
644 Self {
645 level: LevelFilter::INFO,
646 format: LogFormat::Auto,
647 }
648 }
649}
650
651/// Metrics configuration.
652///
653/// Configures submission of internal metrics to Datadog.
654#[derive(Clone, Debug, Default, Deserialize, Serialize)]
655pub struct Metrics {
656 /// Datadog [API key] for metrics.
657 ///
658 /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
659 /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
660 ///
661 /// # Default
662 ///
663 /// `None` (Datadog metrics disabled)
664 ///
665 /// # Environment Variable
666 ///
667 /// `OS__METRICS__DATADOG_KEY`
668 ///
669 /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
670 pub datadog_key: Option<SecretBox<ConfigSecret>>,
671
672 /// Global tags applied to all metrics.
673 ///
674 /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
675 /// identifying the environment, region, or other deployment-specific information.
676 ///
677 /// # Default
678 ///
679 /// Empty (no tags)
680 ///
681 /// # Environment Variables
682 ///
683 /// Each tag is set individually:
684 /// - `OS__METRICS__TAGS__FOO=foo`
685 /// - `OS__METRICS__TAGS__BAR=bar`
686 ///
687 /// # YAML Example
688 ///
689 /// ```yaml
690 /// metrics:
691 /// tags:
692 /// foo: foo
693 /// bar: bar
694 /// ```
695 pub tags: BTreeMap<String, String>,
696}
697
698/// A key that may be used to verify a request's `Authorization` header and its
699/// associated permissions. May contain multiple key versions to facilitate rotation.
700#[derive(Debug, Clone, Deserialize, Serialize)]
701pub struct AuthZVerificationKey {
702 /// Versions of this key's key material which may be used to verify signatures.
703 ///
704 /// If a key is being rotated, the old and new versions of that key should both be
705 /// configured so objectstore can verify signatures while the updated key is still
706 /// rolling out. Otherwise, this should only contain the most recent version of a key.
707 pub key_versions: Vec<SecretBox<ConfigSecret>>,
708
709 /// The maximum set of permissions that this key's signer is authorized to grant.
710 ///
711 /// If a request's `Authorization` header grants full permission but it was signed by
712 /// a key that is only allowed to grant read permission, then the request only has
713 /// read permission.
714 #[serde(default)]
715 pub max_permissions: HashSet<Permission>,
716}
717
718/// Configuration for content-based authorization.
719#[derive(Debug, Default, Clone, Deserialize, Serialize)]
720pub struct AuthZ {
721 /// Whether to enforce content-based authorization or not.
722 ///
723 /// If this is set to `false`, checks are still performed but failures will not result
724 /// in `403 Unauthorized` responses.
725 pub enforce: bool,
726
727 /// Keys that may be used to verify a request's `Authorization` header.
728 ///
729 /// This field is a container that is keyed on a key's ID. When verifying a JWT
730 /// from the `Authorization` header, the `kid` field should be read from the JWT
731 /// header and used to index into this map to select the appropriate key.
732 #[serde(default)]
733 pub keys: BTreeMap<String, AuthZVerificationKey>,
734}
735
736/// Main configuration struct for the objectstore server.
737///
738/// This is the top-level configuration that combines all server settings including networking,
739/// storage backends, runtime, and observability options.
740///
741/// Configuration is loaded with the following precedence (highest to lowest):
742/// 1. Environment variables (prefixed with `OS__`)
743/// 2. YAML configuration file (if provided via `-c` flag)
744/// 3. Default values
745///
746/// See individual field documentation for details on each configuration option, including
747/// defaults and environment variables.
748#[derive(Debug, Clone, Deserialize, Serialize)]
749pub struct Config {
750 /// HTTP server bind address.
751 ///
752 /// The socket address (IP and port) where the HTTP server will listen for incoming
753 /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
754 /// makes the server accessible from all network interfaces.
755 ///
756 /// # Default
757 ///
758 /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
759 ///
760 /// # Environment Variable
761 ///
762 /// `OS__HTTP_ADDR`
763 pub http_addr: SocketAddr,
764
765 /// Storage backend for high-volume, small objects.
766 ///
767 /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
768 /// access with many small objects is desired. Good candidates include Bigtable, local
769 /// filesystem (for development), or fast SSDs. Can be set to the same backend as
770 /// `long_term_storage` for simplicity.
771 ///
772 /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
773 /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
774 /// change in the future and more configuration options will be added to influence this
775 /// decision.
776 ///
777 /// # Default
778 ///
779 /// Filesystem storage in `./data/high-volume` directory
780 ///
781 /// # Environment Variables
782 ///
783 /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
784 /// options.
785 ///
786 /// # Example
787 ///
788 /// ```yaml
789 /// high_volume_storage:
790 /// type: bigtable
791 /// project_id: my-project
792 /// instance_name: objectstore
793 /// table_name: objectstore
794 /// ```
795 pub high_volume_storage: Storage,
796
797 /// Storage backend for large objects with long-term retention.
798 ///
799 /// This backend is used for larger objects in scenarios with lower throughput and higher
800 /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
801 /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
802 ///
803 /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
804 /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
805 /// change in the future and more configuration options will be added to influence this
806 /// decision.
807 ///
808 /// # Default
809 ///
810 /// Filesystem storage in `./data/long-term` directory
811 ///
812 /// # Environment Variables
813 ///
814 /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
815 /// - Additional fields depending on the type (see [`Storage`])
816 ///
817 /// # Example
818 ///
819 /// ```yaml
820 /// long_term_storage:
821 /// type: gcs
822 /// bucket: my-objectstore-bucket
823 /// ```
824 pub long_term_storage: Storage,
825
826 /// Configuration of the internal task runtime.
827 ///
828 /// Controls the thread pool size and behavior of the async runtime powering the server.
829 /// See [`Runtime`] for configuration options.
830 pub runtime: Runtime,
831
832 /// Logging configuration.
833 ///
834 /// Controls log verbosity and output format. See [`Logging`] for configuration options.
835 pub logging: Logging,
836
837 /// Sentry error tracking configuration.
838 ///
839 /// Optional integration with Sentry for error tracking and performance monitoring.
840 /// See [`Sentry`] for configuration options.
841 pub sentry: Sentry,
842
843 /// Internal metrics configuration.
844 ///
845 /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
846 /// configuration options.
847 pub metrics: Metrics,
848
849 /// Content-based authorization configuration.
850 ///
851 /// Controls the verification and enforcement of content-based access control based on the
852 /// JWT in a request's `Authorization` header.
853 pub auth: AuthZ,
854}
855
856impl Default for Config {
857 fn default() -> Self {
858 Self {
859 http_addr: "0.0.0.0:8888".parse().unwrap(),
860
861 high_volume_storage: Storage::FileSystem {
862 path: PathBuf::from("data/high-volume"),
863 },
864 long_term_storage: Storage::FileSystem {
865 path: PathBuf::from("data/long-term"),
866 },
867
868 runtime: Runtime::default(),
869 logging: Logging::default(),
870 sentry: Sentry::default(),
871 metrics: Metrics::default(),
872 auth: AuthZ::default(),
873 }
874 }
875}
876
877impl Config {
878 /// Loads configuration from the provided arguments.
879 ///
880 /// Configuration is merged in the following order (later sources override earlier ones):
881 /// 1. Default values
882 /// 2. YAML configuration file (if provided in `args`)
883 /// 3. Environment variables (prefixed with `OS__`)
884 ///
885 /// # Errors
886 ///
887 /// Returns an error if:
888 /// - The YAML configuration file cannot be read or parsed
889 /// - Environment variables contain invalid values
890 /// - Required fields are missing or invalid
891 pub fn load(path: Option<&Path>) -> Result<Self> {
892 let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
893 if let Some(path) = path {
894 figment = figment.merge(Yaml::file(path));
895 }
896 let config = figment
897 .merge(Env::prefixed(ENV_PREFIX).split("__"))
898 .extract()?;
899
900 Ok(config)
901 }
902}
903
904#[cfg(test)]
905mod tests {
906 use std::io::Write;
907
908 use secrecy::ExposeSecret;
909
910 use super::*;
911
912 #[test]
913 fn configurable_via_env() {
914 figment::Jail::expect_with(|jail| {
915 jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
916 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
917 jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
918 jail.set_env("OS__METRICS__TAGS__FOO", "bar");
919 jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
920 jail.set_env("OS__SENTRY__DSN", "abcde");
921 jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
922 jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
923 jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
924 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
925 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
926
927 let config = Config::load(None).unwrap();
928
929 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
930 else {
931 panic!("expected s3 storage");
932 };
933 assert_eq!(endpoint, "http://localhost:8888");
934 assert_eq!(bucket, "whatever");
935 assert_eq!(
936 config.metrics.tags,
937 [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
938 );
939
940 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
941 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
942 assert_eq!(
943 config.sentry.server_name.as_deref(),
944 Some("objectstore-deadbeef")
945 );
946 assert_eq!(config.sentry.sample_rate, 0.5);
947 assert_eq!(config.sentry.traces_sample_rate, 0.5);
948
949 Ok(())
950 });
951 }
952
953 #[test]
954 fn configurable_via_yaml() {
955 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
956 tempfile
957 .write_all(
958 br#"
959 long_term_storage:
960 type: s3compatible
961 endpoint: http://localhost:8888
962 bucket: whatever
963 sentry:
964 dsn: abcde
965 environment: production
966 server_name: objectstore-deadbeef
967 sample_rate: 0.5
968 traces_sample_rate: 0.5
969 "#,
970 )
971 .unwrap();
972
973 figment::Jail::expect_with(|_jail| {
974 let config = Config::load(Some(tempfile.path())).unwrap();
975
976 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
977 else {
978 panic!("expected s3 storage");
979 };
980 assert_eq!(endpoint, "http://localhost:8888");
981 assert_eq!(bucket, "whatever");
982
983 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
984 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
985 assert_eq!(
986 config.sentry.server_name.as_deref(),
987 Some("objectstore-deadbeef")
988 );
989 assert_eq!(config.sentry.sample_rate, 0.5);
990 assert_eq!(config.sentry.traces_sample_rate, 0.5);
991
992 Ok(())
993 });
994 }
995
996 #[test]
997 fn configured_with_env_and_yaml() {
998 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
999 tempfile
1000 .write_all(
1001 br#"
1002 long_term_storage:
1003 type: s3compatible
1004 endpoint: http://localhost:8888
1005 bucket: whatever
1006 "#,
1007 )
1008 .unwrap();
1009
1010 figment::Jail::expect_with(|jail| {
1011 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1012
1013 let config = Config::load(Some(tempfile.path())).unwrap();
1014
1015 let Storage::S3Compatible {
1016 endpoint,
1017 bucket: _bucket,
1018 } = &dbg!(&config).long_term_storage
1019 else {
1020 panic!("expected s3 storage");
1021 };
1022 // Env should overwrite the yaml config
1023 assert_eq!(endpoint, "http://localhost:9001");
1024
1025 Ok(())
1026 });
1027 }
1028
1029 #[test]
1030 fn configure_auth_with_env() {
1031 figment::Jail::expect_with(|jail| {
1032 jail.set_env("OS__AUTH__ENFORCE", "true");
1033 jail.set_env(
1034 "OS__AUTH__KEYS",
1035 r#"{kid1={key_versions=["abcde","fghij","this is a test\n multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_versions=["12345"],}}"#,
1036 );
1037
1038 let config = Config::load(None).unwrap();
1039
1040 assert!(config.auth.enforce);
1041
1042 let kid1 = config.auth.keys.get("kid1").unwrap();
1043 assert_eq!(kid1.key_versions[0].expose_secret().as_str(), "abcde");
1044 assert_eq!(kid1.key_versions[1].expose_secret().as_str(), "fghij");
1045 assert_eq!(
1046 kid1.key_versions[2].expose_secret().as_str(),
1047 "this is a test\n multiline string\nend of string\n"
1048 );
1049 assert_eq!(
1050 kid1.max_permissions,
1051 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1052 );
1053
1054 let kid2 = config.auth.keys.get("kid2").unwrap();
1055 assert_eq!(kid2.key_versions[0].expose_secret().as_str(), "12345");
1056 assert_eq!(kid2.max_permissions, HashSet::new());
1057
1058 Ok(())
1059 });
1060 }
1061 #[test]
1062 fn configure_auth_with_yaml() {
1063 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1064 tempfile
1065 .write_all(
1066 br#"
1067 auth:
1068 enforce: true
1069 keys:
1070 kid1:
1071 key_versions:
1072 - "abcde"
1073 - "fghij"
1074 - |
1075 this is a test
1076 multiline string
1077 end of string
1078 max_permissions:
1079 - "object.read"
1080 - "object.write"
1081 kid2:
1082 key_versions:
1083 - "12345"
1084 "#,
1085 )
1086 .unwrap();
1087
1088 figment::Jail::expect_with(|_jail| {
1089 let config = Config::load(Some(tempfile.path())).unwrap();
1090
1091 assert!(config.auth.enforce);
1092
1093 let kid1 = config.auth.keys.get("kid1").unwrap();
1094 assert_eq!(kid1.key_versions[0].expose_secret().as_str(), "abcde");
1095 assert_eq!(kid1.key_versions[1].expose_secret().as_str(), "fghij");
1096 assert_eq!(
1097 kid1.key_versions[2].expose_secret().as_str(),
1098 "this is a test\n multiline string\nend of string\n"
1099 );
1100 assert_eq!(
1101 kid1.max_permissions,
1102 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1103 );
1104
1105 let kid2 = config.auth.keys.get("kid2").unwrap();
1106 assert_eq!(kid2.key_versions[0].expose_secret().as_str(), "12345");
1107 assert_eq!(kid2.max_permissions, HashSet::new());
1108
1109 Ok(())
1110 });
1111 }
1112}