objectstore_server/config.rs
1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//! type: filesystem
31//! path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49use crate::rate_limits::RateLimits;
50
51/// Environment variable prefix for all configuration options.
52const ENV_PREFIX: &str = "OS__";
53
54/// Newtype around `String` that may protect against accidental
55/// logging of secrets in our configuration struct. Use with
56/// [`secrecy::SecretBox`].
57#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
58pub struct ConfigSecret(String);
59
60impl ConfigSecret {
61 pub fn as_str(&self) -> &str {
62 self.0.as_str()
63 }
64}
65
66impl From<&str> for ConfigSecret {
67 fn from(str: &str) -> Self {
68 ConfigSecret(str.to_string())
69 }
70}
71
72impl std::ops::Deref for ConfigSecret {
73 type Target = str;
74 fn deref(&self) -> &Self::Target {
75 &self.0
76 }
77}
78
79impl fmt::Debug for ConfigSecret {
80 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
81 write!(f, "[redacted]")
82 }
83}
84
85impl CloneableSecret for ConfigSecret {}
86impl SerializableSecret for ConfigSecret {}
87impl Zeroize for ConfigSecret {
88 fn zeroize(&mut self) {
89 self.0.zeroize();
90 }
91}
92
93/// Storage backend configuration.
94///
95/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
96///
97/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
98#[derive(Debug, Deserialize, Serialize)]
99#[serde(tag = "type", rename_all = "lowercase")]
100pub enum Storage {
101 /// Local filesystem storage backend (type `"filesystem"`).
102 ///
103 /// Stores objects as files on the local filesystem. Suitable for development, testing,
104 /// and single-server deployments.
105 ///
106 /// # Example
107 ///
108 /// ```yaml
109 /// long_term_storage:
110 /// type: filesystem
111 /// path: /data
112 /// ```
113 FileSystem {
114 /// Directory path for storing objects.
115 ///
116 /// The directory will be created if it doesn't exist. Relative paths are resolved from
117 /// the server's working directory.
118 ///
119 /// # Default
120 ///
121 /// `"data"` (relative to the server's working directory)
122 ///
123 /// # Environment Variables
124 ///
125 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
126 /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
127 ///
128 /// Or for long-term storage:
129 /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
130 /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
131 path: PathBuf,
132 },
133
134 /// S3-compatible storage backend (type `"s3compatible"`).
135 ///
136 /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
137 /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
138 ///
139 /// [Amazon S3]: https://aws.amazon.com/s3/
140 ///
141 /// # Example
142 ///
143 /// ```yaml
144 /// long_term_storage:
145 /// type: s3compatible
146 /// endpoint: https://s3.amazonaws.com
147 /// bucket: my-bucket
148 /// ```
149 S3Compatible {
150 /// S3 endpoint URL.
151 ///
152 /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
153 ///
154 /// # Environment Variables
155 ///
156 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
157 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
158 ///
159 /// Or for long-term storage:
160 /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
161 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
162 endpoint: String,
163
164 /// S3 bucket name.
165 ///
166 /// The bucket must exist before starting the server.
167 ///
168 /// # Environment Variables
169 ///
170 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
171 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
172 bucket: String,
173 },
174
175 /// [Google Cloud Storage] backend (type `"gcs"`).
176 ///
177 /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
178 /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
179 /// environment variable or GCE/GKE metadata service.
180 ///
181 /// **Note**: The bucket must be pre-created with the following lifecycle policy:
182 /// - `daysSinceCustomTime`: 1 day
183 /// - `action`: delete
184 ///
185 /// [Google Cloud Storage]: https://cloud.google.com/storage
186 ///
187 /// # Example
188 ///
189 /// ```yaml
190 /// long_term_storage:
191 /// type: gcs
192 /// bucket: objectstore-bucket
193 /// ```
194 Gcs {
195 /// Optional custom GCS endpoint URL.
196 ///
197 /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
198 ///
199 /// # Default
200 ///
201 /// `None` (uses default GCS endpoint)
202 ///
203 /// # Environment Variables
204 ///
205 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
206 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
207 ///
208 /// Or for long-term storage:
209 /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
210 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
211 endpoint: Option<String>,
212
213 /// GCS bucket name.
214 ///
215 /// The bucket must exist before starting the server.
216 ///
217 /// # Environment Variables
218 ///
219 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
220 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
221 bucket: String,
222 },
223
224 /// [Google Bigtable] backend (type `"bigtable"`).
225 ///
226 /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
227 /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
228 /// Application Default Credentials (ADC).
229 ///
230 /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
231 /// following column families:
232 /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
233 /// - `fm`: manual garbage collection (`no GC policy`)
234 ///
235 /// [Google Bigtable]: https://cloud.google.com/bigtable
236 ///
237 /// # Example
238 ///
239 /// ```yaml
240 /// high_volume_storage:
241 /// type: bigtable
242 /// project_id: my-project
243 /// instance_name: objectstore
244 /// table_name: objectstore
245 /// ```
246 BigTable {
247 /// Optional custom Bigtable endpoint.
248 ///
249 /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
250 ///
251 /// # Default
252 ///
253 /// `None` (uses default Bigtable endpoint)
254 ///
255 /// # Environment Variables
256 ///
257 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
258 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
259 ///
260 /// Or for long-term storage:
261 /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
262 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
263 endpoint: Option<String>,
264
265 /// GCP project ID.
266 ///
267 /// The Google project ID (not project number) containing the Bigtable instance.
268 ///
269 /// # Environment Variables
270 ///
271 /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
272 /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
273 project_id: String,
274
275 /// Bigtable instance name.
276 ///
277 /// # Environment Variables
278 ///
279 /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
280 /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
281 instance_name: String,
282
283 /// Bigtable table name.
284 ///
285 /// The table must exist before starting the server.
286 ///
287 /// # Environment Variables
288 ///
289 /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
290 /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
291 table_name: String,
292
293 /// Optional number of connections to maintain to Bigtable.
294 ///
295 /// # Default
296 ///
297 /// `None` (infers connection count based on CPU count)
298 ///
299 /// # Environment Variables
300 ///
301 /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
302 /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
303 connections: Option<usize>,
304 },
305}
306
307/// Runtime configuration for the Tokio async runtime.
308///
309/// Controls the threading behavior of the server's async runtime.
310///
311/// Used in: [`Config::runtime`]
312#[derive(Debug, Deserialize, Serialize)]
313#[serde(default)]
314pub struct Runtime {
315 /// Number of worker threads for the server runtime.
316 ///
317 /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
318 /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
319 /// switching overhead.
320 ///
321 /// Set this in accordance with the resources available to the server, especially in Kubernetes
322 /// environments.
323 ///
324 /// # Default
325 ///
326 /// Defaults to the number of CPU cores on the host machine.
327 ///
328 /// # Environment Variable
329 ///
330 /// `OS__RUNTIME__WORKER_THREADS`
331 ///
332 /// # Considerations
333 ///
334 /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
335 /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
336 /// - Setting this too high can lead to increased memory usage and context switching
337 pub worker_threads: usize,
338
339 /// Interval in seconds for reporting internal runtime metrics.
340 ///
341 /// Defaults to `10` seconds.
342 #[serde(with = "humantime_serde")]
343 pub metrics_interval: Duration,
344}
345
346impl Default for Runtime {
347 fn default() -> Self {
348 Self {
349 worker_threads: num_cpus::get(),
350 metrics_interval: Duration::from_secs(10),
351 }
352 }
353}
354
355/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
356///
357/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
358/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
359///
360/// Used in: [`Config::sentry`]
361#[derive(Debug, Deserialize, Serialize)]
362pub struct Sentry {
363 /// Sentry DSN (Data Source Name).
364 ///
365 /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
366 /// integration is completely disabled.
367 ///
368 /// # Default
369 ///
370 /// `None` (Sentry disabled)
371 ///
372 /// # Environment Variable
373 ///
374 /// `OS__SENTRY__DSN`
375 pub dsn: Option<SecretBox<ConfigSecret>>,
376
377 /// Environment name for this deployment.
378 ///
379 /// Used to distinguish events from different environments (e.g., "production", "staging",
380 /// "development"). This appears in the Sentry UI and can be used for filtering.
381 ///
382 /// # Default
383 ///
384 /// `None`
385 ///
386 /// # Environment Variable
387 ///
388 /// `OS__SENTRY__ENVIRONMENT`
389 pub environment: Option<Cow<'static, str>>,
390
391 /// Server name or identifier.
392 ///
393 /// Used to identify which server instance sent an event. Useful in multi-server deployments for
394 /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
395 ///
396 /// # Default
397 ///
398 /// `None`
399 ///
400 /// # Environment Variable
401 ///
402 /// `OS__SENTRY__SERVER_NAME`
403 pub server_name: Option<Cow<'static, str>>,
404
405 /// Error event sampling rate.
406 ///
407 /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
408 /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
409 ///
410 /// # Default
411 ///
412 /// `1.0` (send all errors)
413 ///
414 /// # Environment Variable
415 ///
416 /// `OS__SENTRY__SAMPLE_RATE`
417 pub sample_rate: f32,
418
419 /// Performance trace sampling rate.
420 ///
421 /// Controls what percentage of transactions (traces) are sent to Sentry for performance
422 /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
423 ///
424 /// **Important**: Performance traces can generate significant data volume in high-traffic
425 /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
426 ///
427 /// # Default
428 ///
429 /// `0.01` (send 1% of traces)
430 ///
431 /// # Environment Variable
432 ///
433 /// `OS__SENTRY__TRACES_SAMPLE_RATE`
434 pub traces_sample_rate: f32,
435
436 /// Whether to inherit sampling decisions from incoming traces.
437 ///
438 /// When `true` (default), if an incoming request contains a distributed tracing header with a
439 /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
440 /// the local `traces_sample_rate` is always used instead.
441 ///
442 /// When this is enabled, the calling service effectively controls the sampling decision for the
443 /// entire trace. Set this to `false` if you want to have independent sampling control at the
444 /// objectstore level.
445 ///
446 /// # Default
447 ///
448 /// `true`
449 ///
450 /// # Environment Variable
451 ///
452 /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
453 pub inherit_sampling_decision: bool,
454
455 /// Enable Sentry SDK debug mode.
456 ///
457 /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
458 /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
459 /// production as it generates verbose logging.
460 ///
461 /// # Default
462 ///
463 /// `false`
464 ///
465 /// # Environment Variable
466 ///
467 /// `OS__SENTRY__DEBUG`
468 pub debug: bool,
469
470 /// Additional tags to attach to all Sentry events.
471 ///
472 /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
473 /// context such as deployment identifiers or environment details.
474 ///
475 /// # Default
476 ///
477 /// Empty (no tags)
478 ///
479 /// # Environment Variables
480 ///
481 /// Each tag is set individually:
482 /// - `OS__SENTRY__TAGS__FOO=foo`
483 /// - `OS__SENTRY__TAGS__BAR=bar`
484 ///
485 /// # YAML Example
486 ///
487 /// ```yaml
488 /// sentry:
489 /// tags:
490 /// foo: foo
491 /// bar: bar
492 /// ```
493 pub tags: BTreeMap<String, String>,
494}
495
496impl Sentry {
497 /// Returns whether Sentry integration is enabled.
498 ///
499 /// Sentry is considered enabled if a DSN is configured.
500 pub fn is_enabled(&self) -> bool {
501 self.dsn.is_some()
502 }
503}
504
505impl Default for Sentry {
506 fn default() -> Self {
507 Self {
508 dsn: None,
509 environment: None,
510 server_name: None,
511 sample_rate: 1.0,
512 traces_sample_rate: 0.01,
513 inherit_sampling_decision: true,
514 debug: false,
515 tags: BTreeMap::new(),
516 }
517 }
518}
519
520/// Log output format.
521///
522/// Controls how log messages are formatted. The format can be explicitly specified or
523/// auto-detected based on whether output is to a TTY.
524#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
525#[serde(rename_all = "lowercase")]
526pub enum LogFormat {
527 /// Auto detect the best format.
528 ///
529 /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
530 Auto,
531
532 /// Pretty printing with colors.
533 ///
534 /// ```text
535 /// INFO objectstore::http > objectstore starting
536 /// ```
537 Pretty,
538
539 /// Simplified plain text output.
540 ///
541 /// ```text
542 /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
543 /// ```
544 Simplified,
545
546 /// Dump out JSON lines.
547 ///
548 /// ```text
549 /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
550 /// ```
551 Json,
552}
553
554/// The logging format parse error.
555#[derive(Clone, Debug)]
556pub struct FormatParseError(String);
557
558impl fmt::Display for FormatParseError {
559 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
560 write!(
561 f,
562 r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
563 self.0
564 )
565 }
566}
567
568impl std::str::FromStr for LogFormat {
569 type Err = FormatParseError;
570
571 fn from_str(s: &str) -> Result<Self, Self::Err> {
572 let result = match s {
573 "" => LogFormat::Auto,
574 s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
575 s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
576 s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
577 s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
578 s => return Err(FormatParseError(s.into())),
579 };
580
581 Ok(result)
582 }
583}
584
585impl std::error::Error for FormatParseError {}
586
587mod display_fromstr {
588 pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
589 where
590 S: serde::Serializer,
591 T: std::fmt::Display,
592 {
593 serializer.collect_str(&value)
594 }
595
596 pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
597 where
598 D: serde::Deserializer<'de>,
599 T: std::str::FromStr,
600 <T as std::str::FromStr>::Err: std::fmt::Display,
601 {
602 use serde::Deserialize;
603 let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
604 s.parse().map_err(serde::de::Error::custom)
605 }
606}
607
608/// Logging configuration.
609///
610/// Controls the verbosity and format of log output. Logs are always written to stderr.
611///
612/// Used in: [`Config::logging`]
613#[derive(Debug, Deserialize, Serialize)]
614pub struct Logging {
615 /// Minimum log level to output.
616 ///
617 /// Controls which log messages are emitted based on their severity. Messages at or above this
618 /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
619 /// OFF.
620 ///
621 /// The `RUST_LOG` environment variable provides more granular control per module if needed.
622 ///
623 /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
624 /// only for debugging.
625 ///
626 /// # Default
627 ///
628 /// `INFO`
629 ///
630 /// # Environment Variable
631 ///
632 /// `OS__LOGGING__LEVEL`
633 ///
634 /// # Considerations
635 ///
636 /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
637 /// - `INFO` is appropriate for production
638 /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
639 /// -
640 #[serde(with = "display_fromstr")]
641 pub level: LevelFilter,
642
643 /// Log output format.
644 ///
645 /// Determines how log messages are formatted. See [`LogFormat`] for available options and
646 /// examples.
647 ///
648 /// # Default
649 ///
650 /// `Auto` (pretty for TTY, simplified otherwise)
651 ///
652 /// # Environment Variable
653 ///
654 /// `OS__LOGGING__FORMAT`
655 pub format: LogFormat,
656}
657
658impl Default for Logging {
659 fn default() -> Self {
660 Self {
661 level: LevelFilter::INFO,
662 format: LogFormat::Auto,
663 }
664 }
665}
666
667/// Metrics configuration.
668///
669/// Configures submission of internal metrics to Datadog.
670#[derive(Debug, Default, Deserialize, Serialize)]
671pub struct Metrics {
672 /// Datadog [API key] for metrics.
673 ///
674 /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
675 /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
676 ///
677 /// # Default
678 ///
679 /// `None` (Datadog metrics disabled)
680 ///
681 /// # Environment Variable
682 ///
683 /// `OS__METRICS__DATADOG_KEY`
684 ///
685 /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
686 pub datadog_key: Option<SecretBox<ConfigSecret>>,
687
688 /// Global tags applied to all metrics.
689 ///
690 /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
691 /// identifying the environment, region, or other deployment-specific information.
692 ///
693 /// # Default
694 ///
695 /// Empty (no tags)
696 ///
697 /// # Environment Variables
698 ///
699 /// Each tag is set individually:
700 /// - `OS__METRICS__TAGS__FOO=foo`
701 /// - `OS__METRICS__TAGS__BAR=bar`
702 ///
703 /// # YAML Example
704 ///
705 /// ```yaml
706 /// metrics:
707 /// tags:
708 /// foo: foo
709 /// bar: bar
710 /// ```
711 pub tags: BTreeMap<String, String>,
712}
713
714/// A key that may be used to verify a request's `Authorization` header and its
715/// associated permissions. May contain multiple key versions to facilitate rotation.
716#[derive(Debug, Deserialize, Serialize)]
717pub struct AuthZVerificationKey {
718 /// Files that contain versions of this key's key material which may be used to verify
719 /// signatures.
720 ///
721 /// If a key is being rotated, the old and new versions of that key should both be
722 /// configured so objectstore can verify signatures while the updated key is still
723 /// rolling out. Otherwise, this should only contain the most recent version of a key.
724 pub key_files: Vec<PathBuf>,
725
726 /// The maximum set of permissions that this key's signer is authorized to grant.
727 ///
728 /// If a request's `Authorization` header grants full permission but it was signed by
729 /// a key that is only allowed to grant read permission, then the request only has
730 /// read permission.
731 #[serde(default)]
732 pub max_permissions: HashSet<Permission>,
733}
734
735/// Configuration for content-based authorization.
736#[derive(Debug, Default, Deserialize, Serialize)]
737pub struct AuthZ {
738 /// Whether to enforce content-based authorization or not.
739 ///
740 /// If this is set to `false`, checks are still performed but failures will not result
741 /// in `403 Unauthorized` responses.
742 pub enforce: bool,
743
744 /// Keys that may be used to verify a request's `Authorization` header.
745 ///
746 /// This field is a container that is keyed on a key's ID. When verifying a JWT
747 /// from the `Authorization` header, the `kid` field should be read from the JWT
748 /// header and used to index into this map to select the appropriate key.
749 #[serde(default)]
750 pub keys: BTreeMap<String, AuthZVerificationKey>,
751}
752
753/// Main configuration struct for the objectstore server.
754///
755/// This is the top-level configuration that combines all server settings including networking,
756/// storage backends, runtime, and observability options.
757///
758/// Configuration is loaded with the following precedence (highest to lowest):
759/// 1. Environment variables (prefixed with `OS__`)
760/// 2. YAML configuration file (if provided via `-c` flag)
761/// 3. Default values
762///
763/// See individual field documentation for details on each configuration option, including
764/// defaults and environment variables.
765#[derive(Debug, Deserialize, Serialize)]
766pub struct Config {
767 /// HTTP server bind address.
768 ///
769 /// The socket address (IP and port) where the HTTP server will listen for incoming
770 /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
771 /// makes the server accessible from all network interfaces.
772 ///
773 /// # Default
774 ///
775 /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
776 ///
777 /// # Environment Variable
778 ///
779 /// `OS__HTTP_ADDR`
780 pub http_addr: SocketAddr,
781
782 /// Storage backend for high-volume, small objects.
783 ///
784 /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
785 /// access with many small objects is desired. Good candidates include Bigtable, local
786 /// filesystem (for development), or fast SSDs. Can be set to the same backend as
787 /// `long_term_storage` for simplicity.
788 ///
789 /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
790 /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
791 /// change in the future and more configuration options will be added to influence this
792 /// decision.
793 ///
794 /// # Default
795 ///
796 /// Filesystem storage in `./data/high-volume` directory
797 ///
798 /// # Environment Variables
799 ///
800 /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
801 /// options.
802 ///
803 /// # Example
804 ///
805 /// ```yaml
806 /// high_volume_storage:
807 /// type: bigtable
808 /// project_id: my-project
809 /// instance_name: objectstore
810 /// table_name: objectstore
811 /// ```
812 pub high_volume_storage: Storage,
813
814 /// Storage backend for large objects with long-term retention.
815 ///
816 /// This backend is used for larger objects in scenarios with lower throughput and higher
817 /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
818 /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
819 ///
820 /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
821 /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
822 /// change in the future and more configuration options will be added to influence this
823 /// decision.
824 ///
825 /// # Default
826 ///
827 /// Filesystem storage in `./data/long-term` directory
828 ///
829 /// # Environment Variables
830 ///
831 /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
832 /// - Additional fields depending on the type (see [`Storage`])
833 ///
834 /// # Example
835 ///
836 /// ```yaml
837 /// long_term_storage:
838 /// type: gcs
839 /// bucket: my-objectstore-bucket
840 /// ```
841 pub long_term_storage: Storage,
842
843 /// Configuration of the internal task runtime.
844 ///
845 /// Controls the thread pool size and behavior of the async runtime powering the server.
846 /// See [`Runtime`] for configuration options.
847 pub runtime: Runtime,
848
849 /// Logging configuration.
850 ///
851 /// Controls log verbosity and output format. See [`Logging`] for configuration options.
852 pub logging: Logging,
853
854 /// Sentry error tracking configuration.
855 ///
856 /// Optional integration with Sentry for error tracking and performance monitoring.
857 /// See [`Sentry`] for configuration options.
858 pub sentry: Sentry,
859
860 /// Internal metrics configuration.
861 ///
862 /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
863 /// configuration options.
864 pub metrics: Metrics,
865
866 /// Content-based authorization configuration.
867 ///
868 /// Controls the verification and enforcement of content-based access control based on the
869 /// JWT in a request's `Authorization` header.
870 pub auth: AuthZ,
871
872 /// A list of matchers for requests to discard without processing.
873 pub killswitches: Killswitches,
874
875 /// Definitions for rate limits to enforce on incoming requests.
876 pub rate_limits: RateLimits,
877}
878
879impl Default for Config {
880 fn default() -> Self {
881 Self {
882 http_addr: "0.0.0.0:8888".parse().unwrap(),
883
884 high_volume_storage: Storage::FileSystem {
885 path: PathBuf::from("data/high-volume"),
886 },
887 long_term_storage: Storage::FileSystem {
888 path: PathBuf::from("data/long-term"),
889 },
890
891 runtime: Runtime::default(),
892 logging: Logging::default(),
893 sentry: Sentry::default(),
894 metrics: Metrics::default(),
895 auth: AuthZ::default(),
896 killswitches: Killswitches::default(),
897 rate_limits: RateLimits::default(),
898 }
899 }
900}
901
902impl Config {
903 /// Loads configuration from the provided arguments.
904 ///
905 /// Configuration is merged in the following order (later sources override earlier ones):
906 /// 1. Default values
907 /// 2. YAML configuration file (if provided in `args`)
908 /// 3. Environment variables (prefixed with `OS__`)
909 ///
910 /// # Errors
911 ///
912 /// Returns an error if:
913 /// - The YAML configuration file cannot be read or parsed
914 /// - Environment variables contain invalid values
915 /// - Required fields are missing or invalid
916 pub fn load(path: Option<&Path>) -> Result<Self> {
917 let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
918 if let Some(path) = path {
919 figment = figment.merge(Yaml::file(path));
920 }
921 let config = figment
922 .merge(Env::prefixed(ENV_PREFIX).split("__"))
923 .extract()?;
924
925 Ok(config)
926 }
927}
928
929#[cfg(test)]
930mod tests {
931 use std::io::Write;
932
933 use secrecy::ExposeSecret;
934
935 use crate::killswitches::Killswitch;
936 use crate::rate_limits::{BandwidthLimits, RateLimits, ThroughputLimits, ThroughputRule};
937
938 use super::*;
939
940 #[test]
941 fn configurable_via_env() {
942 figment::Jail::expect_with(|jail| {
943 jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
944 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
945 jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
946 jail.set_env("OS__METRICS__TAGS__FOO", "bar");
947 jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
948 jail.set_env("OS__SENTRY__DSN", "abcde");
949 jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
950 jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
951 jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
952 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
953 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
954
955 let config = Config::load(None).unwrap();
956
957 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
958 else {
959 panic!("expected s3 storage");
960 };
961 assert_eq!(endpoint, "http://localhost:8888");
962 assert_eq!(bucket, "whatever");
963 assert_eq!(
964 config.metrics.tags,
965 [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
966 );
967
968 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
969 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
970 assert_eq!(
971 config.sentry.server_name.as_deref(),
972 Some("objectstore-deadbeef")
973 );
974 assert_eq!(config.sentry.sample_rate, 0.5);
975 assert_eq!(config.sentry.traces_sample_rate, 0.5);
976
977 Ok(())
978 });
979 }
980
981 #[test]
982 fn configurable_via_yaml() {
983 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
984 tempfile
985 .write_all(
986 br#"
987 long_term_storage:
988 type: s3compatible
989 endpoint: http://localhost:8888
990 bucket: whatever
991 sentry:
992 dsn: abcde
993 environment: production
994 server_name: objectstore-deadbeef
995 sample_rate: 0.5
996 traces_sample_rate: 0.5
997 "#,
998 )
999 .unwrap();
1000
1001 figment::Jail::expect_with(|_jail| {
1002 let config = Config::load(Some(tempfile.path())).unwrap();
1003
1004 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
1005 else {
1006 panic!("expected s3 storage");
1007 };
1008 assert_eq!(endpoint, "http://localhost:8888");
1009 assert_eq!(bucket, "whatever");
1010
1011 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1012 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1013 assert_eq!(
1014 config.sentry.server_name.as_deref(),
1015 Some("objectstore-deadbeef")
1016 );
1017 assert_eq!(config.sentry.sample_rate, 0.5);
1018 assert_eq!(config.sentry.traces_sample_rate, 0.5);
1019
1020 Ok(())
1021 });
1022 }
1023
1024 #[test]
1025 fn configured_with_env_and_yaml() {
1026 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1027 tempfile
1028 .write_all(
1029 br#"
1030 long_term_storage:
1031 type: s3compatible
1032 endpoint: http://localhost:8888
1033 bucket: whatever
1034 "#,
1035 )
1036 .unwrap();
1037
1038 figment::Jail::expect_with(|jail| {
1039 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1040
1041 let config = Config::load(Some(tempfile.path())).unwrap();
1042
1043 let Storage::S3Compatible {
1044 endpoint,
1045 bucket: _bucket,
1046 } = &dbg!(&config).long_term_storage
1047 else {
1048 panic!("expected s3 storage");
1049 };
1050 // Env should overwrite the yaml config
1051 assert_eq!(endpoint, "http://localhost:9001");
1052
1053 Ok(())
1054 });
1055 }
1056
1057 #[test]
1058 fn configure_auth_with_env() {
1059 figment::Jail::expect_with(|jail| {
1060 jail.set_env("OS__AUTH__ENFORCE", "true");
1061 jail.set_env(
1062 "OS__AUTH__KEYS",
1063 r#"{kid1={key_files=["abcde","fghij","this is a test\n multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1064 );
1065
1066 let config = Config::load(None).unwrap();
1067
1068 assert!(config.auth.enforce);
1069
1070 let kid1 = config.auth.keys.get("kid1").unwrap();
1071 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1072 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1073 assert_eq!(
1074 kid1.key_files[2],
1075 Path::new("this is a test\n multiline string\nend of string\n"),
1076 );
1077 assert_eq!(
1078 kid1.max_permissions,
1079 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1080 );
1081
1082 let kid2 = config.auth.keys.get("kid2").unwrap();
1083 assert_eq!(kid2.key_files[0], Path::new("12345"));
1084 assert_eq!(kid2.max_permissions, HashSet::new());
1085
1086 Ok(())
1087 });
1088 }
1089
1090 #[test]
1091 fn configure_auth_with_yaml() {
1092 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1093 tempfile
1094 .write_all(
1095 br#"
1096 auth:
1097 enforce: true
1098 keys:
1099 kid1:
1100 key_files:
1101 - "abcde"
1102 - "fghij"
1103 - |
1104 this is a test
1105 multiline string
1106 end of string
1107 max_permissions:
1108 - "object.read"
1109 - "object.write"
1110 kid2:
1111 key_files:
1112 - "12345"
1113 "#,
1114 )
1115 .unwrap();
1116
1117 figment::Jail::expect_with(|_jail| {
1118 let config = Config::load(Some(tempfile.path())).unwrap();
1119
1120 assert!(config.auth.enforce);
1121
1122 let kid1 = config.auth.keys.get("kid1").unwrap();
1123 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1124 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1125 assert_eq!(
1126 kid1.key_files[2],
1127 Path::new("this is a test\n multiline string\nend of string\n")
1128 );
1129 assert_eq!(
1130 kid1.max_permissions,
1131 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1132 );
1133
1134 let kid2 = config.auth.keys.get("kid2").unwrap();
1135 assert_eq!(kid2.key_files[0], Path::new("12345"));
1136 assert_eq!(kid2.max_permissions, HashSet::new());
1137
1138 Ok(())
1139 });
1140 }
1141
1142 #[test]
1143 fn configure_killswitches_with_yaml() {
1144 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1145 tempfile
1146 .write_all(
1147 br#"
1148 killswitches:
1149 - usecase: broken_usecase
1150 - scopes:
1151 org: "42"
1152 - scopes:
1153 org: "42"
1154 project: "4711"
1155 - usecase: attachments
1156 scopes:
1157 org: "42"
1158 "#,
1159 )
1160 .unwrap();
1161
1162 figment::Jail::expect_with(|_jail| {
1163 let expected = [
1164 Killswitch {
1165 usecase: Some("broken_usecase".into()),
1166 scopes: BTreeMap::new(),
1167 },
1168 Killswitch {
1169 usecase: None,
1170 scopes: BTreeMap::from([("org".into(), "42".into())]),
1171 },
1172 Killswitch {
1173 usecase: None,
1174 scopes: BTreeMap::from([
1175 ("org".into(), "42".into()),
1176 ("project".into(), "4711".into()),
1177 ]),
1178 },
1179 Killswitch {
1180 usecase: Some("attachments".into()),
1181 scopes: BTreeMap::from([("org".into(), "42".into())]),
1182 },
1183 ];
1184
1185 let config = Config::load(Some(tempfile.path())).unwrap();
1186 assert_eq!(&config.killswitches.0, &expected,);
1187
1188 Ok(())
1189 });
1190 }
1191
1192 #[test]
1193 fn configure_rate_limits_with_yaml() {
1194 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1195 tempfile
1196 .write_all(
1197 br#"
1198 rate_limits:
1199 throughput:
1200 global_rps: 1000
1201 burst: 100
1202 usecase_pct: 50
1203 scope_pct: 25
1204 rules:
1205 - usecase: "high_priority"
1206 scopes:
1207 - ["org", "123"]
1208 rps: 500
1209 - scopes:
1210 - ["org", "456"]
1211 - ["project", "789"]
1212 pct: 10
1213 "#,
1214 )
1215 .unwrap();
1216
1217 figment::Jail::expect_with(|_jail| {
1218 let expected = RateLimits {
1219 throughput: ThroughputLimits {
1220 global_rps: Some(1000),
1221 burst: 100,
1222 usecase_pct: Some(50),
1223 scope_pct: Some(25),
1224 rules: vec![
1225 ThroughputRule {
1226 usecase: Some("high_priority".to_string()),
1227 scopes: vec![("org".to_string(), "123".to_string())],
1228 rps: Some(500),
1229 pct: None,
1230 },
1231 ThroughputRule {
1232 usecase: None,
1233 scopes: vec![
1234 ("org".to_string(), "456".to_string()),
1235 ("project".to_string(), "789".to_string()),
1236 ],
1237 rps: None,
1238 pct: Some(10),
1239 },
1240 ],
1241 },
1242 bandwidth: BandwidthLimits::default(),
1243 };
1244
1245 let config = Config::load(Some(tempfile.path())).unwrap();
1246 assert_eq!(config.rate_limits, expected);
1247
1248 Ok(())
1249 });
1250 }
1251}