objectstore_server/config.rs
1//! Configuration for the objectstore server.
2//!
3//! This module provides the configuration system for the objectstore HTTP server. Configuration can
4//! be loaded from multiple sources with the following precedence (highest to lowest):
5//!
6//! 1. Environment variables (prefixed with `OS__`)
7//! 2. YAML configuration file (specified via `-c` or `--config` flag)
8//! 3. Defaults
9//!
10//! See [`Config`] for a description of all configuration fields and their defaults.
11//!
12//! # Environment Variables
13//!
14//! Environment variables use `OS__` as a prefix and double underscores (`__`) to denote nested
15//! configuration structures. For example:
16//!
17//! - `OS__HTTP_ADDR=0.0.0.0:8888` sets the HTTP server address
18//! - `OS__LONG_TERM_STORAGE__TYPE=filesystem` sets the storage type
19//! - `OS__LONG_TERM_STORAGE__PATH=/data` sets the directory name
20//!
21//! # YAML Configuration File
22//!
23//! Configuration can also be provided via a YAML file. The above configuration in YAML format would
24//! look like this:
25//!
26//! ```yaml
27//! http_addr: 0.0.0.0:8888
28//!
29//! long_term_storage:
30//! type: filesystem
31//! path: /data
32//! ```
33
34use std::borrow::Cow;
35use std::collections::BTreeMap;
36use std::fmt;
37use std::net::SocketAddr;
38use std::path::{Path, PathBuf};
39
40use anyhow::Result;
41use figment::providers::{Env, Format, Serialized, Yaml};
42use secrecy::{CloneableSecret, SecretBox, SerializableSecret, zeroize::Zeroize};
43use serde::{Deserialize, Serialize};
44use tracing::level_filters::LevelFilter;
45
46/// Environment variable prefix for all configuration options.
47const ENV_PREFIX: &str = "OS__";
48
49/// Newtype around `String` that may protect against accidental
50/// logging of secrets in our configuration struct. Use with
51/// [`secrecy::SecretBox`].
52#[derive(Clone, Default, Serialize, Deserialize, PartialEq)]
53pub struct ConfigSecret(String);
54
55impl ConfigSecret {
56 pub fn as_str(&self) -> &str {
57 self.0.as_str()
58 }
59}
60
61impl std::ops::Deref for ConfigSecret {
62 type Target = str;
63 fn deref(&self) -> &Self::Target {
64 &self.0
65 }
66}
67
68impl fmt::Debug for ConfigSecret {
69 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
70 write!(f, "[redacted]")
71 }
72}
73
74impl CloneableSecret for ConfigSecret {}
75impl SerializableSecret for ConfigSecret {}
76impl Zeroize for ConfigSecret {
77 fn zeroize(&mut self) {
78 self.0.zeroize();
79 }
80}
81
82/// Storage backend configuration.
83///
84/// The `type` field in YAML or `__TYPE` in environment variables determines which variant is used.
85///
86/// Used in: [`Config::high_volume_storage`], [`Config::long_term_storage`]
87#[derive(Debug, Clone, Deserialize, Serialize)]
88#[serde(tag = "type", rename_all = "lowercase")]
89pub enum Storage {
90 /// Local filesystem storage backend (type `"filesystem"`).
91 ///
92 /// Stores objects as files on the local filesystem. Suitable for development, testing,
93 /// and single-server deployments.
94 ///
95 /// # Example
96 ///
97 /// ```yaml
98 /// long_term_storage:
99 /// type: filesystem
100 /// path: /data
101 /// ```
102 FileSystem {
103 /// Directory path for storing objects.
104 ///
105 /// The directory will be created if it doesn't exist. Relative paths are resolved from
106 /// the server's working directory.
107 ///
108 /// # Default
109 ///
110 /// `"data"` (relative to the server's working directory)
111 ///
112 /// # Environment Variables
113 ///
114 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=filesystem`
115 /// - `OS__HIGH_VOLUME_STORAGE__PATH=/path/to/storage`
116 ///
117 /// Or for long-term storage:
118 /// - `OS__LONG_TERM_STORAGE__TYPE=filesystem`
119 /// - `OS__LONG_TERM_STORAGE__PATH=/path/to/storage`
120 path: PathBuf,
121 },
122
123 /// S3-compatible storage backend (type `"s3compatible"`).
124 ///
125 /// Supports [Amazon S3] and other S3-compatible services. Authentication is handled via
126 /// environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`) or IAM roles.
127 ///
128 /// [Amazon S3]: https://aws.amazon.com/s3/
129 ///
130 /// # Example
131 ///
132 /// ```yaml
133 /// long_term_storage:
134 /// type: s3compatible
135 /// endpoint: https://s3.amazonaws.com
136 /// bucket: my-bucket
137 /// ```
138 S3Compatible {
139 /// S3 endpoint URL.
140 ///
141 /// Examples: `https://s3.amazonaws.com`, `http://localhost:9000` (for MinIO)
142 ///
143 /// # Environment Variables
144 ///
145 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=s3compatible`
146 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=https://s3.amazonaws.com`
147 ///
148 /// Or for long-term storage:
149 /// - `OS__LONG_TERM_STORAGE__TYPE=s3compatible`
150 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=https://s3.amazonaws.com`
151 endpoint: String,
152
153 /// S3 bucket name.
154 ///
155 /// The bucket must exist before starting the server.
156 ///
157 /// # Environment Variables
158 ///
159 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-bucket`
160 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-bucket`
161 bucket: String,
162 },
163
164 /// [Google Cloud Storage] backend (type `"gcs"`).
165 ///
166 /// Stores objects in Google Cloud Storage (GCS). Authentication uses Application Default
167 /// Credentials (ADC), which can be provided via the `GOOGLE_APPLICATION_CREDENTIALS`
168 /// environment variable or GCE/GKE metadata service.
169 ///
170 /// **Note**: The bucket must be pre-created with the following lifecycle policy:
171 /// - `daysSinceCustomTime`: 1 day
172 /// - `action`: delete
173 ///
174 /// [Google Cloud Storage]: https://cloud.google.com/storage
175 ///
176 /// # Example
177 ///
178 /// ```yaml
179 /// long_term_storage:
180 /// type: gcs
181 /// bucket: objectstore-bucket
182 /// ```
183 Gcs {
184 /// Optional custom GCS endpoint URL.
185 ///
186 /// Useful for testing with emulators. If `None`, uses the default GCS endpoint.
187 ///
188 /// # Default
189 ///
190 /// `None` (uses default GCS endpoint)
191 ///
192 /// # Environment Variables
193 ///
194 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=gcs`
195 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=http://localhost:9000` (optional)
196 ///
197 /// Or for long-term storage:
198 /// - `OS__LONG_TERM_STORAGE__TYPE=gcs`
199 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=http://localhost:9000` (optional)
200 endpoint: Option<String>,
201
202 /// GCS bucket name.
203 ///
204 /// The bucket must exist before starting the server.
205 ///
206 /// # Environment Variables
207 ///
208 /// - `OS__HIGH_VOLUME_STORAGE__BUCKET=my-gcs-bucket`
209 /// - `OS__LONG_TERM_STORAGE__BUCKET=my-gcs-bucket`
210 bucket: String,
211 },
212
213 /// [Google Bigtable] backend (type `"bigtable"`).
214 ///
215 /// Stores objects in Google Cloud Bigtable, a NoSQL wide-column database. This backend is
216 /// optimized for high-throughput, low-latency workloads with small objects. Authentication uses
217 /// Application Default Credentials (ADC).
218 ///
219 /// **Note**: The table must be pre-created with appropriate column families. Ensure to have the
220 /// following column families:
221 /// - `fg`: timestamp-based garbage collection (`maxage=1s`)
222 /// - `fm`: manual garbage collection (`no GC policy`)
223 ///
224 /// [Google Bigtable]: https://cloud.google.com/bigtable
225 ///
226 /// # Example
227 ///
228 /// ```yaml
229 /// high_volume_storage:
230 /// type: bigtable
231 /// project_id: my-project
232 /// instance_name: objectstore
233 /// table_name: objectstore
234 /// ```
235 BigTable {
236 /// Optional custom Bigtable endpoint.
237 ///
238 /// Useful for testing with emulators. If `None`, uses the default Bigtable endpoint.
239 ///
240 /// # Default
241 ///
242 /// `None` (uses default Bigtable endpoint)
243 ///
244 /// # Environment Variables
245 ///
246 /// - `OS__HIGH_VOLUME_STORAGE__TYPE=bigtable`
247 /// - `OS__HIGH_VOLUME_STORAGE__ENDPOINT=localhost:8086` (optional)
248 ///
249 /// Or for long-term storage:
250 /// - `OS__LONG_TERM_STORAGE__TYPE=bigtable`
251 /// - `OS__LONG_TERM_STORAGE__ENDPOINT=localhost:8086` (optional)
252 endpoint: Option<String>,
253
254 /// GCP project ID.
255 ///
256 /// The Google project ID (not project number) containing the Bigtable instance.
257 ///
258 /// # Environment Variables
259 ///
260 /// - `OS__HIGH_VOLUME_STORAGE__PROJECT_ID=my-project`
261 /// - `OS__LONG_TERM_STORAGE__PROJECT_ID=my-project`
262 project_id: String,
263
264 /// Bigtable instance name.
265 ///
266 /// # Environment Variables
267 ///
268 /// - `OS__HIGH_VOLUME_STORAGE__INSTANCE_NAME=my-instance`
269 /// - `OS__LONG_TERM_STORAGE__INSTANCE_NAME=my-instance`
270 instance_name: String,
271
272 /// Bigtable table name.
273 ///
274 /// The table must exist before starting the server.
275 ///
276 /// # Environment Variables
277 ///
278 /// - `OS__HIGH_VOLUME_STORAGE__TABLE_NAME=objectstore`
279 /// - `OS__LONG_TERM_STORAGE__TABLE_NAME=objectstore`
280 table_name: String,
281
282 /// Optional number of connections to maintain to Bigtable.
283 ///
284 /// # Default
285 ///
286 /// `None` (infers connection count based on CPU count)
287 ///
288 /// # Environment Variables
289 ///
290 /// - `OS__HIGH_VOLUME_STORAGE__CONNECTIONS=16` (optional)
291 /// - `OS__LONG_TERM_STORAGE__CONNECTIONS=16` (optional)
292 connections: Option<usize>,
293 },
294}
295
296/// Runtime configuration for the Tokio async runtime.
297///
298/// Controls the threading behavior of the server's async runtime.
299///
300/// Used in: [`Config::runtime`]
301#[derive(Debug, Clone, Deserialize, Serialize)]
302#[serde(default)]
303pub struct Runtime {
304 /// Number of worker threads for the server runtime.
305 ///
306 /// This controls the size of the Tokio thread pool used to execute async tasks. More threads
307 /// can improve concurrency for CPU-bound workloads, but too many threads can increase context
308 /// switching overhead.
309 ///
310 /// Set this in accordance with the resources available to the server, especially in Kubernetes
311 /// environments.
312 ///
313 /// # Default
314 ///
315 /// Defaults to the number of CPU cores on the host machine.
316 ///
317 /// # Environment Variable
318 ///
319 /// `OS__RUNTIME__WORKER_THREADS`
320 ///
321 /// # Considerations
322 ///
323 /// - For I/O-bound workloads, the default (number of CPU cores) is usually sufficient
324 /// - For CPU-intensive workloads, consider matching or exceeding the number of cores
325 /// - Setting this too high can lead to increased memory usage and context switching
326 pub worker_threads: usize,
327}
328
329impl Default for Runtime {
330 fn default() -> Self {
331 Self {
332 worker_threads: num_cpus::get(),
333 }
334 }
335}
336
337/// [Sentry](https://sentry.io/) error tracking and performance monitoring configuration.
338///
339/// Configures integration with Sentry for error tracking, performance monitoring, and distributed
340/// tracing. Sentry is disabled by default and only enabled when a DSN is provided.
341///
342/// Used in: [`Config::sentry`]
343#[derive(Debug, Clone, Deserialize, Serialize)]
344pub struct Sentry {
345 /// Sentry DSN (Data Source Name).
346 ///
347 /// When set, enables Sentry error tracking and performance monitoring. When `None`, Sentry
348 /// integration is completely disabled.
349 ///
350 /// # Default
351 ///
352 /// `None` (Sentry disabled)
353 ///
354 /// # Environment Variable
355 ///
356 /// `OS__SENTRY__DSN`
357 pub dsn: Option<SecretBox<ConfigSecret>>,
358
359 /// Environment name for this deployment.
360 ///
361 /// Used to distinguish events from different environments (e.g., "production", "staging",
362 /// "development"). This appears in the Sentry UI and can be used for filtering.
363 ///
364 /// # Default
365 ///
366 /// `None`
367 ///
368 /// # Environment Variable
369 ///
370 /// `OS__SENTRY__ENVIRONMENT`
371 pub environment: Option<Cow<'static, str>>,
372
373 /// Server name or identifier.
374 ///
375 /// Used to identify which server instance sent an event. Useful in multi-server deployments for
376 /// tracking which instance encountered an error. Set to the hostname or pod name of the server.
377 ///
378 /// # Default
379 ///
380 /// `None`
381 ///
382 /// # Environment Variable
383 ///
384 /// `OS__SENTRY__SERVER_NAME`
385 pub server_name: Option<Cow<'static, str>>,
386
387 /// Error event sampling rate.
388 ///
389 /// Controls what percentage of error events are sent to Sentry. A value of `1.0` sends all
390 /// errors, while `0.5` sends 50% of errors, and `0.0` sends no errors.
391 ///
392 /// # Default
393 ///
394 /// `1.0` (send all errors)
395 ///
396 /// # Environment Variable
397 ///
398 /// `OS__SENTRY__SAMPLE_RATE`
399 pub sample_rate: f32,
400
401 /// Performance trace sampling rate.
402 ///
403 /// Controls what percentage of transactions (traces) are sent to Sentry for performance
404 /// monitoring. A value of `1.0` sends all traces, while `0.01` sends 1% of traces.
405 ///
406 /// **Important**: Performance traces can generate significant data volume in high-traffic
407 /// systems. Start with a low rate (0.01-0.1) and adjust based on traffic and Sentry quota.
408 ///
409 /// # Default
410 ///
411 /// `0.01` (send 1% of traces)
412 ///
413 /// # Environment Variable
414 ///
415 /// `OS__SENTRY__TRACES_SAMPLE_RATE`
416 pub traces_sample_rate: f32,
417
418 /// Whether to inherit sampling decisions from incoming traces.
419 ///
420 /// When `true` (default), if an incoming request contains a distributed tracing header with a
421 /// sampling decision (e.g., from an upstream service), that decision is honored. When `false`,
422 /// the local `traces_sample_rate` is always used instead.
423 ///
424 /// When this is enabled, the calling service effectively controls the sampling decision for the
425 /// entire trace. Set this to `false` if you want to have independent sampling control at the
426 /// objectstore level.
427 ///
428 /// # Default
429 ///
430 /// `true`
431 ///
432 /// # Environment Variable
433 ///
434 /// `OS__SENTRY__INHERIT_SAMPLING_DECISION`
435 pub inherit_sampling_decision: bool,
436
437 /// Enable Sentry SDK debug mode.
438 ///
439 /// When enabled, the Sentry SDK will output debug information to stderr, which can be useful
440 /// for troubleshooting Sentry integration issues. It is discouraged to enable this in
441 /// production as it generates verbose logging.
442 ///
443 /// # Default
444 ///
445 /// `false`
446 ///
447 /// # Environment Variable
448 ///
449 /// `OS__SENTRY__DEBUG`
450 pub debug: bool,
451}
452
453impl Sentry {
454 /// Returns whether Sentry integration is enabled.
455 ///
456 /// Sentry is considered enabled if a DSN is configured.
457 pub fn is_enabled(&self) -> bool {
458 self.dsn.is_some()
459 }
460}
461
462impl Default for Sentry {
463 fn default() -> Self {
464 Self {
465 dsn: None,
466 environment: None,
467 server_name: None,
468 sample_rate: 1.0,
469 traces_sample_rate: 0.01,
470 inherit_sampling_decision: true,
471 debug: false,
472 }
473 }
474}
475
476/// Log output format.
477///
478/// Controls how log messages are formatted. The format can be explicitly specified or
479/// auto-detected based on whether output is to a TTY.
480#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Deserialize, Serialize)]
481#[serde(rename_all = "lowercase")]
482pub enum LogFormat {
483 /// Auto detect the best format.
484 ///
485 /// This chooses [`LogFormat::Pretty`] for TTY, otherwise [`LogFormat::Simplified`].
486 Auto,
487
488 /// Pretty printing with colors.
489 ///
490 /// ```text
491 /// INFO objectstore::http > objectstore starting
492 /// ```
493 Pretty,
494
495 /// Simplified plain text output.
496 ///
497 /// ```text
498 /// 2020-12-04T12:10:32Z [objectstore::http] INFO: objectstore starting
499 /// ```
500 Simplified,
501
502 /// Dump out JSON lines.
503 ///
504 /// ```text
505 /// {"timestamp":"2020-12-04T12:11:08.729716Z","level":"INFO","logger":"objectstore::http","message":"objectstore starting","module_path":"objectstore::http","filename":"objectstore_service/src/http.rs","lineno":31}
506 /// ```
507 Json,
508}
509
510/// The logging format parse error.
511#[derive(Clone, Debug)]
512pub struct FormatParseError(String);
513
514impl fmt::Display for FormatParseError {
515 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
516 write!(
517 f,
518 r#"error parsing "{}" as format: expected one of "auto", "pretty", "simplified", "json""#,
519 self.0
520 )
521 }
522}
523
524impl std::str::FromStr for LogFormat {
525 type Err = FormatParseError;
526
527 fn from_str(s: &str) -> Result<Self, Self::Err> {
528 let result = match s {
529 "" => LogFormat::Auto,
530 s if s.eq_ignore_ascii_case("auto") => LogFormat::Auto,
531 s if s.eq_ignore_ascii_case("pretty") => LogFormat::Pretty,
532 s if s.eq_ignore_ascii_case("simplified") => LogFormat::Simplified,
533 s if s.eq_ignore_ascii_case("json") => LogFormat::Json,
534 s => return Err(FormatParseError(s.into())),
535 };
536
537 Ok(result)
538 }
539}
540
541impl std::error::Error for FormatParseError {}
542
543mod display_fromstr {
544 pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
545 where
546 S: serde::Serializer,
547 T: std::fmt::Display,
548 {
549 serializer.collect_str(&value)
550 }
551
552 pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
553 where
554 D: serde::Deserializer<'de>,
555 T: std::str::FromStr,
556 <T as std::str::FromStr>::Err: std::fmt::Display,
557 {
558 use serde::Deserialize;
559 let s = <std::borrow::Cow<'de, str>>::deserialize(deserializer)?;
560 s.parse().map_err(serde::de::Error::custom)
561 }
562}
563
564/// Logging configuration.
565///
566/// Controls the verbosity and format of log output. Logs are always written to stderr.
567///
568/// Used in: [`Config::logging`]
569#[derive(Debug, Clone, Deserialize, Serialize)]
570pub struct Logging {
571 /// Minimum log level to output.
572 ///
573 /// Controls which log messages are emitted based on their severity. Messages at or above this
574 /// level will be output. Valid levels in increasing severity: TRACE, DEBUG, INFO, WARN, ERROR,
575 /// OFF.
576 ///
577 /// The `RUST_LOG` environment variable provides more granular control per module if needed.
578 ///
579 /// **Important**: Levels `DEBUG` and `TRACE` are very verbose and can impact performance; use
580 /// only for debugging.
581 ///
582 /// # Default
583 ///
584 /// `INFO`
585 ///
586 /// # Environment Variable
587 ///
588 /// `OS__LOGGING__LEVEL`
589 ///
590 /// # Considerations
591 ///
592 /// - `TRACE` and `DEBUG` can be very verbose and impact performance; use only for debugging
593 /// - `INFO` is appropriate for production
594 /// - `WARN` or `ERROR` can be used to reduce log volume in high-traffic systems
595 /// -
596 #[serde(with = "display_fromstr")]
597 pub level: LevelFilter,
598
599 /// Log output format.
600 ///
601 /// Determines how log messages are formatted. See [`LogFormat`] for available options and
602 /// examples.
603 ///
604 /// # Default
605 ///
606 /// `Auto` (pretty for TTY, simplified otherwise)
607 ///
608 /// # Environment Variable
609 ///
610 /// `OS__LOGGING__FORMAT`
611 pub format: LogFormat,
612}
613
614impl Default for Logging {
615 fn default() -> Self {
616 Self {
617 level: LevelFilter::INFO,
618 format: LogFormat::Auto,
619 }
620 }
621}
622
623/// Metrics configuration.
624///
625/// Configures submission of internal metrics to Datadog.
626#[derive(Clone, Debug, Default, Deserialize, Serialize)]
627pub struct Metrics {
628 /// Datadog [API key] for metrics.
629 ///
630 /// When provided, enables metrics reporting to Datadog. Metrics include request counts,
631 /// latencies, storage operations, and more. The key is kept secret and redacted from logs.
632 ///
633 /// # Default
634 ///
635 /// `None` (Datadog metrics disabled)
636 ///
637 /// # Environment Variable
638 ///
639 /// `OS__METRICS__DATADOG_KEY`
640 ///
641 /// [API key]: https://docs.datadoghq.com/account_management/api-app-keys/#api-keys
642 pub datadog_key: Option<SecretBox<ConfigSecret>>,
643
644 /// Global tags applied to all metrics.
645 ///
646 /// Key-value pairs that are attached to every metric sent to Datadog. Useful for
647 /// identifying the environment, region, or other deployment-specific information.
648 ///
649 /// # Default
650 ///
651 /// Empty (no tags)
652 ///
653 /// # Environment Variables
654 ///
655 /// Each tag is set individually:
656 /// - `OS__METRICS__TAGS__FOO=foo`
657 /// - `OS__METRICS__TAGS__BAR=bar`
658 ///
659 /// # YAML Example
660 ///
661 /// ```yaml
662 /// metrics:
663 /// tags:
664 /// foo: foo
665 /// bar: bar
666 /// ```
667 pub tags: BTreeMap<String, String>,
668}
669
670/// Main configuration struct for the objectstore server.
671///
672/// This is the top-level configuration that combines all server settings including networking,
673/// storage backends, runtime, and observability options.
674///
675/// Configuration is loaded with the following precedence (highest to lowest):
676/// 1. Environment variables (prefixed with `OS__`)
677/// 2. YAML configuration file (if provided via `-c` flag)
678/// 3. Default values
679///
680/// See individual field documentation for details on each configuration option, including
681/// defaults and environment variables.
682#[derive(Debug, Clone, Deserialize, Serialize)]
683pub struct Config {
684 /// HTTP server bind address.
685 ///
686 /// The socket address (IP and port) where the HTTP server will listen for incoming
687 /// connections. Supports both IPv4 and IPv6 addresses. Note that binding to `0.0.0.0`
688 /// makes the server accessible from all network interfaces.
689 ///
690 /// # Default
691 ///
692 /// `0.0.0.0:8888` (listens on all network interfaces, port 8888)
693 ///
694 /// # Environment Variable
695 ///
696 /// `OS__HTTP_ADDR`
697 pub http_addr: SocketAddr,
698
699 /// Storage backend for high-volume, small objects.
700 ///
701 /// This backend is used for smaller objects in scenarios where high-throughput, low-latency
702 /// access with many small objects is desired. Good candidates include Bigtable, local
703 /// filesystem (for development), or fast SSDs. Can be set to the same backend as
704 /// `long_term_storage` for simplicity.
705 ///
706 /// **Note**: Currently, objects up to 1 MiB are stored in this backend, while larger objects
707 /// are stored in the [`long_term_storage`](`Config::long_term_storage`). This is subject to
708 /// change in the future and more configuration options will be added to influence this
709 /// decision.
710 ///
711 /// # Default
712 ///
713 /// Filesystem storage in `./data/high-volume` directory
714 ///
715 /// # Environment Variables
716 ///
717 /// - `OS__HIGH_VOLUME_STORAGE__TYPE` for the backend type. See [`Storage`] for available
718 /// options.
719 ///
720 /// # Example
721 ///
722 /// ```yaml
723 /// high_volume_storage:
724 /// type: bigtable
725 /// project_id: my-project
726 /// instance_name: objectstore
727 /// table_name: objectstore
728 /// ```
729 pub high_volume_storage: Storage,
730
731 /// Storage backend for large objects with long-term retention.
732 ///
733 /// This backend is used for larger objects in scenarios with lower throughput and higher
734 /// latency requirements. Good candidates include S3, Google Cloud Storage, or other object
735 /// storage systems. Can be set to the same backend as `high_volume_storage` for simplicity.
736 ///
737 /// **Note**: Currently, objects over 1 MiB are stored in this backend, while smaller objects
738 /// are stored in the [`high_volume_storage`](`Config::high_volume_storage`). This is subject to
739 /// change in the future and more configuration options will be added to influence this
740 /// decision.
741 ///
742 /// # Default
743 ///
744 /// Filesystem storage in `./data/long-term` directory
745 ///
746 /// # Environment Variables
747 ///
748 /// - `OS__LONG_TERM_STORAGE__TYPE` - Backend type (filesystem, s3compatible, gcs, bigtable)
749 /// - Additional fields depending on the type (see [`Storage`])
750 ///
751 /// # Example
752 ///
753 /// ```yaml
754 /// long_term_storage:
755 /// type: gcs
756 /// bucket: my-objectstore-bucket
757 /// ```
758 pub long_term_storage: Storage,
759
760 /// Configuration of the internal task runtime.
761 ///
762 /// Controls the thread pool size and behavior of the async runtime powering the server.
763 /// See [`Runtime`] for configuration options.
764 pub runtime: Runtime,
765
766 /// Logging configuration.
767 ///
768 /// Controls log verbosity and output format. See [`Logging`] for configuration options.
769 pub logging: Logging,
770
771 /// Sentry error tracking configuration.
772 ///
773 /// Optional integration with Sentry for error tracking and performance monitoring.
774 /// See [`Sentry`] for configuration options.
775 pub sentry: Sentry,
776
777 /// Internal metrics configuration.
778 ///
779 /// Optional configuration for submitting internal metrics to Datadog. See [`Metrics`] for
780 /// configuration options.
781 pub metrics: Metrics,
782}
783
784impl Default for Config {
785 fn default() -> Self {
786 Self {
787 http_addr: "0.0.0.0:8888".parse().unwrap(),
788
789 high_volume_storage: Storage::FileSystem {
790 path: PathBuf::from("data/high-volume"),
791 },
792 long_term_storage: Storage::FileSystem {
793 path: PathBuf::from("data/long-term"),
794 },
795
796 runtime: Runtime::default(),
797 logging: Logging::default(),
798 sentry: Sentry::default(),
799 metrics: Metrics::default(),
800 }
801 }
802}
803
804impl Config {
805 /// Loads configuration from the provided arguments.
806 ///
807 /// Configuration is merged in the following order (later sources override earlier ones):
808 /// 1. Default values
809 /// 2. YAML configuration file (if provided in `args`)
810 /// 3. Environment variables (prefixed with `OS__`)
811 ///
812 /// # Errors
813 ///
814 /// Returns an error if:
815 /// - The YAML configuration file cannot be read or parsed
816 /// - Environment variables contain invalid values
817 /// - Required fields are missing or invalid
818 pub fn load(path: Option<&Path>) -> Result<Self> {
819 let mut figment = figment::Figment::from(Serialized::defaults(Config::default()));
820 if let Some(path) = path {
821 figment = figment.merge(Yaml::file(path));
822 }
823 let config = figment
824 .merge(Env::prefixed(ENV_PREFIX).split("__"))
825 .extract()?;
826
827 Ok(config)
828 }
829}
830
831#[cfg(test)]
832mod tests {
833 use std::io::Write;
834
835 use secrecy::ExposeSecret;
836
837 use super::*;
838
839 #[test]
840 fn configurable_via_env() {
841 figment::Jail::expect_with(|jail| {
842 jail.set_env("OS__LONG_TERM_STORAGE__TYPE", "s3compatible");
843 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:8888");
844 jail.set_env("OS__LONG_TERM_STORAGE__BUCKET", "whatever");
845 jail.set_env("OS__METRICS__TAGS__FOO", "bar");
846 jail.set_env("OS__METRICS__TAGS__BAZ", "qux");
847 jail.set_env("OS__SENTRY__DSN", "abcde");
848 jail.set_env("OS__SENTRY__SAMPLE_RATE", "0.5");
849 jail.set_env("OS__SENTRY__ENVIRONMENT", "production");
850 jail.set_env("OS__SENTRY__SERVER_NAME", "objectstore-deadbeef");
851 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
852 jail.set_env("OS__SENTRY__TRACES_SAMPLE_RATE", "0.5");
853
854 let config = Config::load(None).unwrap();
855
856 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
857 else {
858 panic!("expected s3 storage");
859 };
860 assert_eq!(endpoint, "http://localhost:8888");
861 assert_eq!(bucket, "whatever");
862 assert_eq!(
863 config.metrics.tags,
864 [("foo".into(), "bar".into()), ("baz".into(), "qux".into())].into()
865 );
866
867 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
868 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
869 assert_eq!(
870 config.sentry.server_name.as_deref(),
871 Some("objectstore-deadbeef")
872 );
873 assert_eq!(config.sentry.sample_rate, 0.5);
874 assert_eq!(config.sentry.traces_sample_rate, 0.5);
875
876 Ok(())
877 });
878 }
879
880 #[test]
881 fn configurable_via_yaml() {
882 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
883 tempfile
884 .write_all(
885 br#"
886 long_term_storage:
887 type: s3compatible
888 endpoint: http://localhost:8888
889 bucket: whatever
890 sentry:
891 dsn: abcde
892 environment: production
893 server_name: objectstore-deadbeef
894 sample_rate: 0.5
895 traces_sample_rate: 0.5
896 "#,
897 )
898 .unwrap();
899
900 figment::Jail::expect_with(|_jail| {
901 let config = Config::load(Some(tempfile.path())).unwrap();
902
903 let Storage::S3Compatible { endpoint, bucket } = &dbg!(&config).long_term_storage
904 else {
905 panic!("expected s3 storage");
906 };
907 assert_eq!(endpoint, "http://localhost:8888");
908 assert_eq!(bucket, "whatever");
909
910 assert_eq!(config.sentry.dsn.unwrap().expose_secret().as_str(), "abcde");
911 assert_eq!(config.sentry.environment.as_deref(), Some("production"));
912 assert_eq!(
913 config.sentry.server_name.as_deref(),
914 Some("objectstore-deadbeef")
915 );
916 assert_eq!(config.sentry.sample_rate, 0.5);
917 assert_eq!(config.sentry.traces_sample_rate, 0.5);
918
919 Ok(())
920 });
921 }
922
923 #[test]
924 fn configured_with_env_and_yaml() {
925 let mut tempfile = tempfile::NamedTempFile::new().unwrap();
926 tempfile
927 .write_all(
928 br#"
929 long_term_storage:
930 type: s3compatible
931 endpoint: http://localhost:8888
932 bucket: whatever
933 "#,
934 )
935 .unwrap();
936
937 figment::Jail::expect_with(|jail| {
938 jail.set_env("OS__LONG_TERM_STORAGE__ENDPOINT", "http://localhost:9001");
939
940 let config = Config::load(Some(tempfile.path())).unwrap();
941
942 let Storage::S3Compatible {
943 endpoint,
944 bucket: _bucket,
945 } = &dbg!(&config).long_term_storage
946 else {
947 panic!("expected s3 storage");
948 };
949 // Env should overwrite the yaml config
950 assert_eq!(endpoint, "http://localhost:9001");
951
952 Ok(())
953 });
954 }
955}