relay_pii/
config.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::sync::OnceLock;
4
5use regex::{Regex, RegexBuilder};
6use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8use crate::{CompiledPiiConfig, Redaction, SelectorSpec};
9
10const COMPILED_PATTERN_MAX_SIZE: usize = 262_144;
11
12/// Helper method to check whether a flag is false.
13#[allow(clippy::trivially_copy_pass_by_ref)]
14pub(crate) fn is_flag_default(flag: &bool) -> bool {
15    !*flag
16}
17
18/// An error returned when parsing [`PiiConfig`].
19#[derive(Clone, Debug, thiserror::Error)]
20pub enum PiiConfigError {
21    /// A match pattern in a PII rule config could not be parsed.
22    #[error("could not parse pattern")]
23    RegexError(#[source] regex::Error),
24}
25
26/// Wrapper for the regex and the raw pattern string.
27///
28/// The regex will be compiled only when it used once, and the compiled version will be reused on
29/// consecutive calls.
30#[derive(Debug, Clone)]
31pub struct LazyPattern {
32    raw: Cow<'static, str>,
33    case_insensitive: bool,
34    pattern: OnceLock<Result<Regex, PiiConfigError>>,
35}
36
37impl PartialEq for LazyPattern {
38    fn eq(&self, other: &Self) -> bool {
39        self.raw.to_lowercase() == other.raw.to_lowercase()
40    }
41}
42
43impl LazyPattern {
44    /// Create a new [`LazyPattern`] from a raw string.
45    pub fn new<S>(raw: S) -> Self
46    where
47        Cow<'static, str>: From<S>,
48    {
49        Self {
50            raw: raw.into(),
51            case_insensitive: false,
52            pattern: OnceLock::new(),
53        }
54    }
55
56    /// Change the case sensativity settings for the underlying regex.
57    ///
58    /// It's possible to set the case sensativity on already compiled [`LazyPattern`], which will
59    /// be recompiled (re-built) once it's used again.
60    pub fn case_insensitive(mut self, value: bool) -> Self {
61        self.case_insensitive = value;
62        self.pattern.take();
63        self
64    }
65
66    /// Compiles the regex from the internal raw string.
67    pub fn compiled(&self) -> Result<&Regex, &PiiConfigError> {
68        self.pattern
69            .get_or_init(|| {
70                let regex_result = RegexBuilder::new(&self.raw)
71                    .size_limit(COMPILED_PATTERN_MAX_SIZE)
72                    .case_insensitive(self.case_insensitive)
73                    .build()
74                    .map_err(PiiConfigError::RegexError);
75
76                if let Err(ref error) = regex_result {
77                    relay_log::error!(
78                        error = error as &dyn std::error::Error,
79                        "unable to compile pattern into regex"
80                    );
81                }
82                regex_result
83            })
84            .as_ref()
85    }
86}
87
88impl From<&'static str> for LazyPattern {
89    fn from(pattern: &'static str) -> LazyPattern {
90        LazyPattern::new(pattern)
91    }
92}
93
94impl Serialize for LazyPattern {
95    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
96        serializer.serialize_str(&self.raw)
97    }
98}
99
100impl<'de> Deserialize<'de> for LazyPattern {
101    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
102        let raw = String::deserialize(deserializer)?;
103        Ok(LazyPattern::new(raw))
104    }
105}
106
107#[allow(clippy::unnecessary_wraps)]
108fn replace_groups_default() -> Option<BTreeSet<u8>> {
109    let mut set = BTreeSet::new();
110    set.insert(0);
111    Some(set)
112}
113
114/// A rule that matches a regex pattern.
115#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
116#[serde(rename_all = "camelCase")]
117pub struct PatternRule {
118    /// The regular expression to apply.
119    pub pattern: LazyPattern,
120    /// The match group indices to replace.
121    #[serde(default = "replace_groups_default")]
122    pub replace_groups: Option<BTreeSet<u8>>,
123}
124
125/// A rule that dispatches to multiple other rules.
126#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
127#[serde(rename_all = "camelCase")]
128pub struct MultipleRule {
129    /// A reference to other rules to apply
130    pub rules: Vec<String>,
131    /// When set to true, the outer rule is reported.
132    #[serde(default, skip_serializing_if = "is_flag_default")]
133    pub hide_inner: bool,
134}
135
136/// An alias for another rule.
137#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
138#[serde(rename_all = "camelCase")]
139pub struct AliasRule {
140    /// A reference to another rule to apply.
141    pub rule: String,
142    /// When set to true, the outer rule is reported.
143    #[serde(default, skip_serializing_if = "is_flag_default")]
144    pub hide_inner: bool,
145}
146
147/// A pair redaction rule.
148#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
149#[serde(rename_all = "camelCase")]
150pub struct RedactPairRule {
151    /// A pattern to match for keys.
152    pub key_pattern: LazyPattern,
153}
154
155/// Supported scrubbing rules.
156#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
157#[serde(tag = "type", rename_all = "snake_case")]
158pub enum RuleType {
159    /// Matches any value.
160    Anything,
161    /// Applies a regular expression.
162    Pattern(PatternRule),
163    /// Matchse an IMEI or IMEISV
164    Imei,
165    /// Matches a mac address
166    Mac,
167    /// Matches a UUID
168    Uuid,
169    /// Matches an email
170    Email,
171    /// Matches any IP address
172    Ip,
173    /// Matches a creditcard number
174    Creditcard,
175    /// Matches an IBAN
176    Iban,
177    /// Sanitizes a path from user data
178    Userpath,
179    /// A PEM encoded key
180    Pemkey,
181    /// Auth info from URLs
182    UrlAuth,
183    /// US SSN.
184    UsSsn,
185    /// A Bearer token
186    Bearer,
187    /// Keys that look like passwords
188    Password,
189    /// When a regex matches a key, a value is removed
190    #[serde(alias = "redactPair")]
191    RedactPair(RedactPairRule),
192    /// Applies multiple rules.
193    Multiple(MultipleRule),
194    /// Applies another rule.  Works like a single multiple.
195    Alias(AliasRule),
196    /// Unknown ruletype for forward compatibility
197    Unknown(String),
198}
199
200/// A single rule configuration.
201#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
202pub struct RuleSpec {
203    /// The matching rule to apply on fields.
204    #[serde(flatten)]
205    pub ty: RuleType,
206
207    /// The redaction to apply on matched fields.
208    #[serde(default)]
209    pub redaction: Redaction,
210}
211
212/// Configuration for rule parameters.
213#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
214#[serde(rename_all = "camelCase")]
215pub struct Vars {
216    /// The default secret key for hashing operations.
217    #[serde(default, skip_serializing_if = "Option::is_none")]
218    pub hash_key: Option<String>,
219}
220
221impl Vars {
222    fn is_empty(&self) -> bool {
223        self.hash_key.is_none()
224    }
225}
226
227/// A set of named rule configurations.
228#[derive(Serialize, Deserialize, Debug, Default, Clone)]
229pub struct PiiConfig {
230    /// A map of custom PII rules.
231    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
232    pub rules: BTreeMap<String, RuleSpec>,
233
234    /// Parameters for PII rules.
235    #[serde(default, skip_serializing_if = "Vars::is_empty")]
236    pub vars: Vars,
237
238    /// Mapping of selectors to rules.
239    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
240    pub applications: BTreeMap<SelectorSpec, Vec<String>>,
241
242    /// PII config derived from datascrubbing settings.
243    ///
244    /// Cached because the conversion process is expensive.
245    #[serde(skip)]
246    pub(super) compiled: OnceLock<CompiledPiiConfig>,
247}
248
249impl PartialEq for PiiConfig {
250    fn eq(&self, other: &PiiConfig) -> bool {
251        // This is written in this way such that people will not forget to update this PartialEq
252        // impl when they add more fields.
253        let PiiConfig {
254            rules,
255            vars,
256            applications,
257            compiled: _compiled,
258        } = &self;
259
260        rules == &other.rules && vars == &other.vars && applications == &other.applications
261    }
262}
263
264impl PiiConfig {
265    /// Get a representation of this `PiiConfig` that is more (CPU-)efficient for processing.
266    ///
267    /// This can be computationally expensive when called for the first time. The result is cached
268    /// internally and reused on the second call.
269    pub fn compiled(&self) -> &CompiledPiiConfig {
270        self.compiled.get_or_init(|| self.compiled_uncached())
271    }
272
273    /// Like [`compiled`](Self::compiled) but without internal caching.
274    #[inline]
275    pub fn compiled_uncached(&self) -> CompiledPiiConfig {
276        CompiledPiiConfig::new(self)
277    }
278}