objectstore_server/config.rs
1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//! type: filesystem
31//! path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::{BTreeMap, HashSet};
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39use std::time::Duration;
40
41use anyhow::Result;
42use figment::providers::{Env, Format, Serialized, Yaml};
43use objectstore_types::Permission;
44use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
45use serde::{Deserialize, Serialize};
46use tracing::level_filters::LevelFilter;
47
48use crate::killswitches::Killswitches;
49
50/// Environment variable prefix for all configuration options.
51const ENV_PREFIX: &str = "OS__";
52
53/// Newtype around `String` that may protect against accidental
54/// logging of secrets in our configuration struct. Use with
55/// [`secrecy::SecretBox`].
56#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
57pub struct ConfigSecret(String);
58
59impl ConfigSecret {
60 pub fn as_str(&self) -> &str {
61 self.0.as_str()
62 }
63}
64
65impl From<&str> for ConfigSecret {
66 fn from(str: &str) -> Self {
67 ConfigSecret(str.to_string())
68 }
69}
70
71impl std::ops::Deref for ConfigSecret {
72 type Target = str;
73 fn deref(&self) -> &Self::Target {
74 &self.0
75 }
76}
77
78impl fmt::Debug for ConfigSecret {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
80 write!(f, "[redacted]")
81 }
82}
83
84impl CloneableSecret for ConfigSecret {}
85impl SerializableSecret for ConfigSecret {}
86impl Zeroize for ConfigSecret {
87 fn zeroize(&mut self) {
88 self.0.zeroize();
89 }
90}
91
92/// Storage backend configuration.
93///
94/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
95///
96/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
97#[derive(Debug, Deserialize, Serialize)]
98#[serde(tag = "type", rename_all = "lowercase")]
99pub enum Storage {
100 /// Local filesystem storage backend (type `"filesystem"`).
101 ///
102 /// Stores objects as files on the local filesystem. Suitable for development, testing,
103 /// and single-server deployments.
104 ///
105 /// # Example
106 ///
107 /// ```yaml
108 /// long_term_storage:
109 /// type: filesystem
110 /// path: /data
111 /// ```
112 FileSystem {
113 /// Directory path for storing objects.
114 ///
115 /// The directory will be created if it doesn't exist. Relative paths are resolved from
116 /// the server's working directory.
117 ///
118 /// # Default
119 ///
120 /// `"data"` (relative to the server's working directory)
121 ///
122 /// # Environment Variables
123 ///
124 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
125 /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
126 ///
127 /// Or for long-term storage:
128 /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
129 /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
130 path: PathBuf,
131 },
132
133 /// S3-compatible storage backend (type `"s3compatible"`).
134 ///
135 /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
136 /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
137 ///
138 /// [Amazon S3]: https://aws.amazon.com/s3/
139 ///
140 /// # Example
141 ///
142 /// ```yaml
143 /// long_term_storage:
144 /// type: s3compatible
145 /// endpoint: https://s3.amazonaws.com
146 /// bucket: my-bucket
147 /// ```
148 S3Compatible {
149 /// S3 endpoint URL.
150 ///
151 /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
152 ///
153 /// # Environment Variables
154 ///
155 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
156 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
157 ///
158 /// Or for long-term storage:
159 /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
160 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
161 endpoint: String,
162
163 /// S3 bucket name.
164 ///
165 /// The bucket must exist before starting the server.
166 ///
167 /// # Environment Variables
168 ///
169 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
170 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
171 bucket: String,
172 },
173
174 /// [Google Cloud Storage] backend (type `"gcs"`).
175 ///
176 /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
177 /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
178 /// environment variable or GCE/GKE metadata service.
179 ///
180 /// **Note**: The bucket must be pre-created with the following lifecycle policy:
181 /// - `daysSinceCustomTime`: 1 day
182 /// - `action`: delete
183 ///
184 /// [Google Cloud Storage]: https://cloud.google.com/storage
185 ///
186 /// # Example
187 ///
188 /// ```yaml
189 /// long_term_storage:
190 /// type: gcs
191 /// bucket: objectstore-bucket
192 /// ```
193 Gcs {
194 /// Optional custom GCS endpoint URL.
195 ///
196 /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
197 ///
198 /// # Default
199 ///
200 /// `None` (uses default GCS endpoint)
201 ///
202 /// # Environment Variables
203 ///
204 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
205 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
206 ///
207 /// Or for long-term storage:
208 /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
209 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
210 endpoint: Option<String>,
211
212 /// GCS bucket name.
213 ///
214 /// The bucket must exist before starting the server.
215 ///
216 /// # Environment Variables
217 ///
218 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
219 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
220 bucket: String,
221 },
222
223 /// [Google Bigtable] backend (type `"bigtable"`).
224 ///
225 /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
226 /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
227 /// Application Default Credentials (ADC).
228 ///
229 /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
230 /// following column families:
231 /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
232 /// - `fm`: manual garbage collection (`no GC policy`)
233 ///
234 /// [Google Bigtable]: https://cloud.google.com/bigtable
235 ///
236 /// # Example
237 ///
238 /// ```yaml
239 /// high_volume_storage:
240 /// type: bigtable
241 /// project_id: my-project
242 /// instance_name: objectstore
243 /// table_name: objectstore
244 /// ```
245 BigTable {
246 /// Optional custom Bigtable endpoint.
247 ///
248 /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
249 ///
250 /// # Default
251 ///
252 /// `None` (uses default Bigtable endpoint)
253 ///
254 /// # Environment Variables
255 ///
256 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
257 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
258 ///
259 /// Or for long-term storage:
260 /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
261 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
262 endpoint: Option<String>,
263
264 /// GCP project ID.
265 ///
266 /// The Google project ID (not project number) containing the Bigtable instance.
267 ///
268 /// # Environment Variables
269 ///
270 /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
271 /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
272 project_id: String,
273
274 /// Bigtable instance name.
275 ///
276 /// # Environment Variables
277 ///
278 /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
279 /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
280 instance_name: String,
281
282 /// Bigtable table name.
283 ///
284 /// The table must exist before starting the server.
285 ///
286 /// # Environment Variables
287 ///
288 /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
289 /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
290 table_name: String,
291
292 /// Optional number of connections to maintain to Bigtable.
293 ///
294 /// # Default
295 ///
296 /// `None` (infers connection count based on CPU count)
297 ///
298 /// # Environment Variables
299 ///
300 /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
301 /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
302 connections: Option<usize>,
303 },
304}
305
306/// Runtime configuration for the Tokio async runtime.
307///
308/// Controls the threading behavior of the server's async runtime.
309///
310/// Used in: [`Config::runtime`]
311#[derive(Debug, Deserialize, Serialize)]
312#[serde(default)]
313pub struct Runtime {
314 /// Number of worker threads for the server runtime.
315 ///
316 /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
317 /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
318 /// switching overhead.
319 ///
320 /// Set this in accordance with the resources available to the server, especially in Kubernetes
321 /// environments.
322 ///
323 /// # Default
324 ///
325 /// Defaults to the number of CPU cores on the host machine.
326 ///
327 /// # Environment Variable
328 ///
329 /// `OS__RUNTIME__WORKER_THREADS`
330 ///
331 /// # Considerations
332 ///
333 /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
334 /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
335 /// - Setting this too high can lead to increased memory usage and context switching
336 pub worker_threads: usize,
337
338 /// Interval in seconds for reporting internal runtime metrics.
339 ///
340 /// Defaults to `10` seconds.
341 #[serde(with = "humantime_serde")]
342 pub metrics_interval: Duration,
343}
344
345impl Default for Runtime {
346 fn default() -> Self {
347 Self {
348 worker_threads: num_cpus::get(),
349 metrics_interval: Duration::from_secs(10),
350 }
351 }
352}
353
354/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
355///
356/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
357/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
358///
359/// Used in: [`Config::sentry`]
360#[derive(Debug, Deserialize, Serialize)]
361pub struct Sentry {
362 /// Sentry DSN (Data Source Name).
363 ///
364 /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
365 /// integration is completely disabled.
366 ///
367 /// # Default
368 ///
369 /// `None` (Sentry disabled)
370 ///
371 /// # Environment Variable
372 ///
373 /// `OS__SENTRY__DSN`
374 pub dsn: Option<SecretBox<ConfigSecret>>,
375
376 /// Environment name for this deployment.
377 ///
378 /// Used to distinguish events from different environments (e.g., "production", "staging",
379 /// "development"). This appears in the Sentry UI and can be used for filtering.
380 ///
381 /// # Default
382 ///
383 /// `None`
384 ///
385 /// # Environment Variable
386 ///
387 /// `OS__SENTRY__ENVIRONMENT`
388 pub environment: Option<Cow<'static, str>>,
389
390 /// Server name or identifier.
391 ///
392 /// Used to identify which server instance sent an event. Useful in multi-server deployments for
393 /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
394 ///
395 /// # Default
396 ///
397 /// `None`
398 ///
399 /// # Environment Variable
400 ///
401 /// `OS__SENTRY__SERVER_NAME`
402 pub server_name: Option<Cow<'static, str>>,
403
404 /// Error event sampling rate.
405 ///
406 /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
407 /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
408 ///
409 /// # Default
410 ///
411 /// `1.0` (send all errors)
412 ///
413 /// # Environment Variable
414 ///
415 /// `OS__SENTRY__SAMPLE_RATE`
416 pub sample_rate: f32,
417
418 /// Performance trace sampling rate.
419 ///
420 /// Controls what percentage of transactions (traces) are sent to Sentry for performance
421 /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
422 ///
423 /// **Important**: Performance traces can generate significant data volume in high-traffic
424 /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
425 ///
426 /// # Default
427 ///
428 /// `0.01` (send 1% of traces)
429 ///
430 /// # Environment Variable
431 ///
432 /// `OS__SENTRY__TRACES_SAMPLE_RATE`
433 pub traces_sample_rate: f32,
434
435 /// Whether to inherit sampling decisions from incoming traces.
436 ///
437 /// When `true` (default), if an incoming request contains a distributed tracing header with a
438 /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
439 /// the local `traces_sample_rate` is always used instead.
440 ///
441 /// When this is enabled, the calling service effectively controls the sampling decision for the
442 /// entire trace. Set this to `false` if you want to have independent sampling control at the
443 /// objectstore level.
444 ///
445 /// # Default
446 ///
447 /// `true`
448 ///
449 /// # Environment Variable
450 ///
451 /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
452 pub inherit_sampling_decision: bool,
453
454 /// Enable Sentry SDK debug mode.
455 ///
456 /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
457 /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
458 /// production as it generates verbose logging.
459 ///
460 /// # Default
461 ///
462 /// `false`
463 ///
464 /// # Environment Variable
465 ///
466 /// `OS__SENTRY__DEBUG`
467 pub debug: bool,
468
469 /// Additional tags to attach to all Sentry events.
470 ///
471 /// Key-value pairs that are sent as tags with every event reported to Sentry. Useful for adding
472 /// context such as deployment identifiers or environment details.
473 ///
474 /// # Default
475 ///
476 /// Empty (no tags)
477 ///
478 /// # Environment Variables
479 ///
480 /// Each tag is set individually:
481 /// - `OS__SENTRY__TAGS__FOO=foo`
482 /// - `OS__SENTRY__TAGS__BAR=bar`
483 ///
484 /// # YAML Example
485 ///
486 /// ```yaml
487 /// sentry:
488 /// tags:
489 /// foo: foo
490 /// bar: bar
491 /// ```
492 pub tags: BTreeMap<String, String>,
493}
494
495impl Sentry {
496 /// Returns whether Sentry integration is enabled.
497 ///
498 /// Sentry is considered enabled if a DSN is configured.
499 pub fn is_enabled(&self) -> bool {
500 self.dsn.is_some()
501 }
502}
503
504impl Default for Sentry {
505 fn default() -> Self {
506 Self {
507 dsn: None,
508 environment: None,
509 server_name: None,
510 sample_rate: 1.0,
511 traces_sample_rate: 0.01,
512 inherit_sampling_decision: true,
513 debug: false,
514 tags: BTreeMap::new(),
515 }
516 }
517}
518
519/// Log output format.
520///
521/// Controls how log messages are formatted. The format can be explicitly specified or
522/// auto-detected based on whether output is to a TTY.
523#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
524#[serde(rename_all = "lowercase")]
525pub enum LogFormat {
526 /// Auto detect the best format.
527 ///
528 /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
529 Auto,
530
531 /// Pretty printing with colors.
532 ///
533 /// ```text
534 /// INFO objectstore::http > objectstore starting
535 /// ```
536 Pretty,
537
538 /// Simplified plain text output.
539 ///
540 /// ```text
541 /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
542 /// ```
543 Simplified,
544
545 /// Dump out JSON lines.
546 ///
547 /// ```text
548 /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
549 /// ```
550 Json,
551}
552
553/// The logging format parse error.
554#[derive(Clone, Debug)]
555pub struct FormatParseError(String);
556
557impl fmt::Display for FormatParseError {
558 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559 write!(
560 f,
561 r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
562 self.0
563 )
564 }
565}
566
567impl std::str::FromStr for LogFormat {
568 type Err = FormatParseError;
569
570 fn from_str(s: &str) -> Result<Self, Self::Err> {
571 let result = match s {
572 "" => LogFormat::Auto,
573 s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
574 s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
575 s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
576 s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
577 s => return Err(FormatParseError(s.into())),
578 };
579
580 Ok(result)
581 }
582}
583
584impl std::error::Error for FormatParseError {}
585
586mod display_fromstr {
587 pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
588 where
589 S: serde::Serializer,
590 T: std::fmt::Display,
591 {
592 serializer.collect_str(&value)
593 }
594
595 pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
596 where
597 D: serde::Deserializer<'de>,
598 T: std::str::FromStr,
599 <T as std::str::FromStr>::Err: std::fmt::Display,
600 {
601 use serde::Deserialize;
602 let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
603 s.parse().map_err(serde::de::Error::custom)
604 }
605}
606
607/// Logging configuration.
608///
609/// Controls the verbosity and format of log output. Logs are always written to stderr.
610///
611/// Used in: [`Config::logging`]
612#[derive(Debug, Deserialize, Serialize)]
613pub struct Logging {
614 /// Minimum log level to output.
615 ///
616 /// Controls which log messages are emitted based on their severity. Messages at or above this
617 /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
618 /// OFF.
619 ///
620 /// The `RUST_LOG` environment variable provides more granular control per module if needed.
621 ///
622 /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
623 /// only for debugging.
624 ///
625 /// # Default
626 ///
627 /// `INFO`
628 ///
629 /// # Environment Variable
630 ///
631 /// `OS__LOGGING__LEVEL`
632 ///
633 /// # Considerations
634 ///
635 /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
636 /// - `INFO` is appropriate for production
637 /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
638 /// -
639 #[serde(with = "display_fromstr")]
640 pub level: LevelFilter,
641
642 /// Log output format.
643 ///
644 /// Determines how log messages are formatted. See [`LogFormat`] for available options and
645 /// examples.
646 ///
647 /// # Default
648 ///
649 /// `Auto` (pretty for TTY, simplified otherwise)
650 ///
651 /// # Environment Variable
652 ///
653 /// `OS__LOGGING__FORMAT`
654 pub format: LogFormat,
655}
656
657impl Default for Logging {
658 fn default() -> Self {
659 Self {
660 level: LevelFilter::INFO,
661 format: LogFormat::Auto,
662 }
663 }
664}
665
666/// Metrics configuration.
667///
668/// Configures submission of internal metrics to Datadog.
669#[derive(Debug, Default, Deserialize, Serialize)]
670pub struct Metrics {
671 /// Datadog [API key] for metrics.
672 ///
673 /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
674 /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
675 ///
676 /// # Default
677 ///
678 /// `None` (Datadog metrics disabled)
679 ///
680 /// # Environment Variable
681 ///
682 /// `OS__METRICS__DATADOG_KEY`
683 ///
684 /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
685 pub datadog_key: Option<SecretBox<ConfigSecret>>,
686
687 /// Global tags applied to all metrics.
688 ///
689 /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
690 /// identifying the environment, region, or other deployment-specific information.
691 ///
692 /// # Default
693 ///
694 /// Empty (no tags)
695 ///
696 /// # Environment Variables
697 ///
698 /// Each tag is set individually:
699 /// - `OS__METRICS__TAGS__FOO=foo`
700 /// - `OS__METRICS__TAGS__BAR=bar`
701 ///
702 /// # YAML Example
703 ///
704 /// ```yaml
705 /// metrics:
706 /// tags:
707 /// foo: foo
708 /// bar: bar
709 /// ```
710 pub tags: BTreeMap<String, String>,
711}
712
713/// A key that may be used to verify a request's `Authorization` header and its
714/// associated permissions. May contain multiple key versions to facilitate rotation.
715#[derive(Debug, Deserialize, Serialize)]
716pub struct AuthZVerificationKey {
717 /// Files that contain versions of this key's key material which may be used to verify
718 /// signatures.
719 ///
720 /// If a key is being rotated, the old and new versions of that key should both be
721 /// configured so objectstore can verify signatures while the updated key is still
722 /// rolling out. Otherwise, this should only contain the most recent version of a key.
723 pub key_files: Vec<PathBuf>,
724
725 /// The maximum set of permissions that this key's signer is authorized to grant.
726 ///
727 /// If a request's `Authorization` header grants full permission but it was signed by
728 /// a key that is only allowed to grant read permission, then the request only has
729 /// read permission.
730 #[serde(default)]
731 pub max_permissions: HashSet<Permission>,
732}
733
734/// Configuration for content-based authorization.
735#[derive(Debug, Default, Deserialize, Serialize)]
736pub struct AuthZ {
737 /// Whether to enforce content-based authorization or not.
738 ///
739 /// If this is set to `false`, checks are still performed but failures will not result
740 /// in `403 Unauthorized` responses.
741 pub enforce: bool,
742
743 /// Keys that may be used to verify a request's `Authorization` header.
744 ///
745 /// This field is a container that is keyed on a key's ID. When verifying a JWT
746 /// from the `Authorization` header, the `kid` field should be read from the JWT
747 /// header and used to index into this map to select the appropriate key.
748 #[serde(default)]
749 pub keys: BTreeMap<String, AuthZVerificationKey>,
750}
751
752/// Main configuration struct for the objectstore server.
753///
754/// This is the top-level configuration that combines all server settings including networking,
755/// storage backends, runtime, and observability options.
756///
757/// Configuration is loaded with the following precedence (highest to lowest):
758/// 1. Environment variables (prefixed with `OS__`)
759/// 2. YAML configuration file (if provided via `-c` flag)
760/// 3. Default values
761///
762/// See individual field documentation for details on each configuration option, including
763/// defaults and environment variables.
764#[derive(Debug, Deserialize, Serialize)]
765pub struct Config {
766 /// HTTP server bind address.
767 ///
768 /// The socket address (IP and port) where the HTTP server will listen for incoming
769 /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
770 /// makes the server accessible from all network interfaces.
771 ///
772 /// # Default
773 ///
774 /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
775 ///
776 /// # Environment Variable
777 ///
778 /// `OS__HTTP_ADDR`
779 pub http_addr: SocketAddr,
780
781 /// Storage backend for high-volume, small objects.
782 ///
783 /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
784 /// access with many small objects is desired. Good candidates include Bigtable, local
785 /// filesystem (for development), or fast SSDs. Can be set to the same backend as
786 /// `long_term_storage` for simplicity.
787 ///
788 /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
789 /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
790 /// change in the future and more configuration options will be added to influence this
791 /// decision.
792 ///
793 /// # Default
794 ///
795 /// Filesystem storage in `./data/high-volume` directory
796 ///
797 /// # Environment Variables
798 ///
799 /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
800 /// options.
801 ///
802 /// # Example
803 ///
804 /// ```yaml
805 /// high_volume_storage:
806 /// type: bigtable
807 /// project_id: my-project
808 /// instance_name: objectstore
809 /// table_name: objectstore
810 /// ```
811 pub high_volume_storage: Storage,
812
813 /// Storage backend for large objects with long-term retention.
814 ///
815 /// This backend is used for larger objects in scenarios with lower throughput and higher
816 /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
817 /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
818 ///
819 /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
820 /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
821 /// change in the future and more configuration options will be added to influence this
822 /// decision.
823 ///
824 /// # Default
825 ///
826 /// Filesystem storage in `./data/long-term` directory
827 ///
828 /// # Environment Variables
829 ///
830 /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
831 /// - Additional fields depending on the type (see [`Storage`])
832 ///
833 /// # Example
834 ///
835 /// ```yaml
836 /// long_term_storage:
837 /// type: gcs
838 /// bucket: my-objectstore-bucket
839 /// ```
840 pub long_term_storage: Storage,
841
842 /// Configuration of the internal task runtime.
843 ///
844 /// Controls the thread pool size and behavior of the async runtime powering the server.
845 /// See [`Runtime`] for configuration options.
846 pub runtime: Runtime,
847
848 /// Logging configuration.
849 ///
850 /// Controls log verbosity and output format. See [`Logging`] for configuration options.
851 pub logging: Logging,
852
853 /// Sentry error tracking configuration.
854 ///
855 /// Optional integration with Sentry for error tracking and performance monitoring.
856 /// See [`Sentry`] for configuration options.
857 pub sentry: Sentry,
858
859 /// Internal metrics configuration.
860 ///
861 /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
862 /// configuration options.
863 pub metrics: Metrics,
864
865 /// Content-based authorization configuration.
866 ///
867 /// Controls the verification and enforcement of content-based access control based on the
868 /// JWT in a request's `Authorization` header.
869 pub auth: AuthZ,
870
871 /// A list of matchers for requests to discard without processing.
872 pub killswitches: Killswitches,
873}
874
875impl Default for Config {
876 fn default() -> Self {
877 Self {
878 http_addr: "0.0.0.0:8888".parse().unwrap(),
879
880 high_volume_storage: Storage::FileSystem {
881 path: PathBuf::from("data/high-volume"),
882 },
883 long_term_storage: Storage::FileSystem {
884 path: PathBuf::from("data/long-term"),
885 },
886
887 runtime: Runtime::default(),
888 logging: Logging::default(),
889 sentry: Sentry::default(),
890 metrics: Metrics::default(),
891 auth: AuthZ::default(),
892 killswitches: Killswitches::default(),
893 }
894 }
895}
896
897impl Config {
898 /// Loads configuration from the provided arguments.
899 ///
900 /// Configuration is merged in the following order (later sources override earlier ones):
901 /// 1. Default values
902 /// 2. YAML configuration file (if provided in `args`)
903 /// 3. Environment variables (prefixed with `OS__`)
904 ///
905 /// # Errors
906 ///
907 /// Returns an error if:
908 /// - The YAML configuration file cannot be read or parsed
909 /// - Environment variables contain invalid values
910 /// - Required fields are missing or invalid
911 pub fn load(path: Option<&Path>) -> Result<Self> {
912 let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
913 if let Some(path) = path {
914 figment = figment.merge(Yaml::file(path));
915 }
916 let config = figment
917 .merge(Env::prefixed(ENV_PREFIX).split("__"))
918 .extract()?;
919
920 Ok(config)
921 }
922}
923
924#[cfg(test)]
925mod tests {
926 use std::io::Write;
927
928 use secrecy::ExposeSecret;
929
930 use crate::killswitches::Killswitch;
931
932 use super::*;
933
934 #[test]
935 fn configurable_via_env() {
936 figment::Jail::expect_with(|jail| {
937 jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
938 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
939 jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
940 jail.set_env("OS__METRICS__TAGS__FOO", "bar");
941 jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
942 jail.set_env("OS__SENTRY__DSN", "abcde");
943 jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
944 jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
945 jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
946 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
947 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
948
949 let config = Config::load(None).unwrap();
950
951 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
952 else {
953 panic!("expected s3 storage");
954 };
955 assert_eq!(endpoint, "http://localhost:8888");
956 assert_eq!(bucket, "whatever");
957 assert_eq!(
958 config.metrics.tags,
959 [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
960 );
961
962 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
963 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
964 assert_eq!(
965 config.sentry.server_name.as_deref(),
966 Some("objectstore-deadbeef")
967 );
968 assert_eq!(config.sentry.sample_rate, 0.5);
969 assert_eq!(config.sentry.traces_sample_rate, 0.5);
970
971 Ok(())
972 });
973 }
974
975 #[test]
976 fn configurable_via_yaml() {
977 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
978 tempfile
979 .write_all(
980 br#"
981 long_term_storage:
982 type: s3compatible
983 endpoint: http://localhost:8888
984 bucket: whatever
985 sentry:
986 dsn: abcde
987 environment: production
988 server_name: objectstore-deadbeef
989 sample_rate: 0.5
990 traces_sample_rate: 0.5
991 "#,
992 )
993 .unwrap();
994
995 figment::Jail::expect_with(|_jail| {
996 let config = Config::load(Some(tempfile.path())).unwrap();
997
998 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
999 else {
1000 panic!("expected s3 storage");
1001 };
1002 assert_eq!(endpoint, "http://localhost:8888");
1003 assert_eq!(bucket, "whatever");
1004
1005 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
1006 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
1007 assert_eq!(
1008 config.sentry.server_name.as_deref(),
1009 Some("objectstore-deadbeef")
1010 );
1011 assert_eq!(config.sentry.sample_rate, 0.5);
1012 assert_eq!(config.sentry.traces_sample_rate, 0.5);
1013
1014 Ok(())
1015 });
1016 }
1017
1018 #[test]
1019 fn configured_with_env_and_yaml() {
1020 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1021 tempfile
1022 .write_all(
1023 br#"
1024 long_term_storage:
1025 type: s3compatible
1026 endpoint: http://localhost:8888
1027 bucket: whatever
1028 "#,
1029 )
1030 .unwrap();
1031
1032 figment::Jail::expect_with(|jail| {
1033 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
1034
1035 let config = Config::load(Some(tempfile.path())).unwrap();
1036
1037 let Storage::S3Compatible {
1038 endpoint,
1039 bucket: _bucket,
1040 } = &dbg!(&config).long_term_storage
1041 else {
1042 panic!("expected s3 storage");
1043 };
1044 // Env should overwrite the yaml config
1045 assert_eq!(endpoint, "http://localhost:9001");
1046
1047 Ok(())
1048 });
1049 }
1050
1051 #[test]
1052 fn configure_auth_with_env() {
1053 figment::Jail::expect_with(|jail| {
1054 jail.set_env("OS__AUTH__ENFORCE", "true");
1055 jail.set_env(
1056 "OS__AUTH__KEYS",
1057 r#"{kid1={key_files=["abcde","fghij","this is a test\n multiline string\nend of string\n"],max_permissions=["object.read", "object.write"],}, kid2={key_files=["12345"],}}"#,
1058 );
1059
1060 let config = Config::load(None).unwrap();
1061
1062 assert!(config.auth.enforce);
1063
1064 let kid1 = config.auth.keys.get("kid1").unwrap();
1065 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1066 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1067 assert_eq!(
1068 kid1.key_files[2],
1069 Path::new("this is a test\n multiline string\nend of string\n"),
1070 );
1071 assert_eq!(
1072 kid1.max_permissions,
1073 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1074 );
1075
1076 let kid2 = config.auth.keys.get("kid2").unwrap();
1077 assert_eq!(kid2.key_files[0], Path::new("12345"));
1078 assert_eq!(kid2.max_permissions, HashSet::new());
1079
1080 Ok(())
1081 });
1082 }
1083
1084 #[test]
1085 fn configure_auth_with_yaml() {
1086 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1087 tempfile
1088 .write_all(
1089 br#"
1090 auth:
1091 enforce: true
1092 keys:
1093 kid1:
1094 key_files:
1095 - "abcde"
1096 - "fghij"
1097 - |
1098 this is a test
1099 multiline string
1100 end of string
1101 max_permissions:
1102 - "object.read"
1103 - "object.write"
1104 kid2:
1105 key_files:
1106 - "12345"
1107 "#,
1108 )
1109 .unwrap();
1110
1111 figment::Jail::expect_with(|_jail| {
1112 let config = Config::load(Some(tempfile.path())).unwrap();
1113
1114 assert!(config.auth.enforce);
1115
1116 let kid1 = config.auth.keys.get("kid1").unwrap();
1117 assert_eq!(kid1.key_files[0], Path::new("abcde"));
1118 assert_eq!(kid1.key_files[1], Path::new("fghij"));
1119 assert_eq!(
1120 kid1.key_files[2],
1121 Path::new("this is a test\n multiline string\nend of string\n")
1122 );
1123 assert_eq!(
1124 kid1.max_permissions,
1125 HashSet::from([Permission::ObjectRead, Permission::ObjectWrite])
1126 );
1127
1128 let kid2 = config.auth.keys.get("kid2").unwrap();
1129 assert_eq!(kid2.key_files[0], Path::new("12345"));
1130 assert_eq!(kid2.max_permissions, HashSet::new());
1131
1132 Ok(())
1133 });
1134 }
1135
1136 #[test]
1137 fn configure_killswitches_with_yaml() {
1138 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
1139 tempfile
1140 .write_all(
1141 br#"
1142 killswitches:
1143 - usecase: broken_usecase
1144 - scopes:
1145 org: "42"
1146 - scopes:
1147 org: "42"
1148 project: "4711"
1149 - usecase: attachments
1150 scopes:
1151 org: "42"
1152 "#,
1153 )
1154 .unwrap();
1155
1156 figment::Jail::expect_with(|_jail| {
1157 let expected = [
1158 Killswitch {
1159 usecase: Some("broken_usecase".into()),
1160 scopes: BTreeMap::new(),
1161 },
1162 Killswitch {
1163 usecase: None,
1164 scopes: BTreeMap::from([("org".into(), "42".into())]),
1165 },
1166 Killswitch {
1167 usecase: None,
1168 scopes: BTreeMap::from([
1169 ("org".into(), "42".into()),
1170 ("project".into(), "4711".into()),
1171 ]),
1172 },
1173 Killswitch {
1174 usecase: Some("attachments".into()),
1175 scopes: BTreeMap::from([("org".into(), "42".into())]),
1176 },
1177 ];
1178
1179 let config = Config::load(Some(tempfile.path())).unwrap();
1180 assert_eq!(&config.killswitches.0, &expected,);
1181
1182 Ok(())
1183 });
1184 }
1185}