relay_pii/
config.rs

1use std::borrow::Cow;
2use std::collections::{BTreeMap, BTreeSet};
3use std::sync::OnceLock;
4
5use regex::{Regex, RegexBuilder};
6use serde::{Deserialize, Deserializer, Serialize, Serializer};
7
8use crate::{CompiledPiiConfig, Redaction, SelectorSpec};
9
10const COMPILED_PATTERN_MAX_SIZE: usize = 262_144;
11
12/// Helper method to check whether a flag is false.
13#[allow(clippy::trivially_copy_pass_by_ref)]
14pub(crate) fn is_flag_default(flag: &bool) -> bool {
15    !*flag
16}
17
18/// An error returned when parsing [`PiiConfig`].
19#[derive(Clone, Debug, thiserror::Error)]
20pub enum PiiConfigError {
21    /// A match pattern in a PII rule config could not be parsed.
22    #[error("could not parse pattern")]
23    RegexError(#[source] regex::Error),
24}
25
26/// Wrapper for the regex and the raw pattern string.
27///
28/// The regex will be compiled only when it used once, and the compiled version will be reused on
29/// consecutive calls.
30#[derive(Debug, Clone)]
31pub struct LazyPattern {
32    raw: Cow<'static, str>,
33    case_insensitive: bool,
34    pattern: OnceLock<Result<Regex, PiiConfigError>>,
35}
36
37impl PartialEq for LazyPattern {
38    fn eq(&self, other: &Self) -> bool {
39        self.raw.to_lowercase() == other.raw.to_lowercase()
40    }
41}
42
43impl LazyPattern {
44    /// Create a new [`LazyPattern`] from a raw string.
45    pub fn new<S>(raw: S) -> Self
46    where
47        Cow<'static, str>: From<S>,
48    {
49        Self {
50            raw: raw.into(),
51            case_insensitive: false,
52            pattern: OnceLock::new(),
53        }
54    }
55
56    /// Change the case sensativity settings for the underlying regex.
57    ///
58    /// It's possible to set the case sensativity on already compiled [`LazyPattern`], which will
59    /// be recompiled (re-built) once it's used again.
60    pub fn case_insensitive(mut self, value: bool) -> Self {
61        self.case_insensitive = value;
62        self.pattern.take();
63        self
64    }
65
66    /// Compiles the regex from the internal raw string.
67    pub fn compiled(&self) -> Result<&Regex, &PiiConfigError> {
68        self.pattern
69            .get_or_init(|| {
70                let regex_result = RegexBuilder::new(&self.raw)
71                    .size_limit(COMPILED_PATTERN_MAX_SIZE)
72                    .case_insensitive(self.case_insensitive)
73                    .build()
74                    .map_err(PiiConfigError::RegexError);
75
76                if let Err(ref error) = regex_result {
77                    relay_log::error!(
78                        error = error as &dyn std::error::Error,
79                        "unable to compile pattern into regex"
80                    );
81                }
82                regex_result
83            })
84            .as_ref()
85    }
86}
87
88impl From<&'static str> for LazyPattern {
89    fn from(pattern: &'static str) -> LazyPattern {
90        LazyPattern::new(pattern)
91    }
92}
93
94impl Serialize for LazyPattern {
95    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
96        serializer.serialize_str(&self.raw)
97    }
98}
99
100impl<'de> Deserialize<'de> for LazyPattern {
101    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
102        let raw = String::deserialize(deserializer)?;
103        Ok(LazyPattern::new(raw))
104    }
105}
106
107#[allow(clippy::unnecessary_wraps)]
108fn replace_groups_default() -> Option<BTreeSet<u8>> {
109    let mut set = BTreeSet::new();
110    set.insert(0);
111    Some(set)
112}
113
114/// A rule that matches a regex pattern.
115#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
116#[serde(rename_all = "camelCase")]
117pub struct PatternRule {
118    /// The regular expression to apply.
119    pub pattern: LazyPattern,
120    /// The match group indices to replace.
121    #[serde(default = "replace_groups_default")]
122    pub replace_groups: Option<BTreeSet<u8>>,
123}
124
125/// A rule that dispatches to multiple other rules.
126#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
127#[serde(rename_all = "camelCase")]
128pub struct MultipleRule {
129    /// A reference to other rules to apply
130    pub rules: Vec<String>,
131    /// When set to true, the outer rule is reported.
132    #[serde(default, skip_serializing_if = "is_flag_default")]
133    pub hide_inner: bool,
134}
135
136/// An alias for another rule.
137#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
138#[serde(rename_all = "camelCase")]
139pub struct AliasRule {
140    /// A reference to another rule to apply.
141    pub rule: String,
142    /// When set to true, the outer rule is reported.
143    #[serde(default, skip_serializing_if = "is_flag_default")]
144    pub hide_inner: bool,
145}
146
147/// A pair redaction rule.
148#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
149#[serde(rename_all = "camelCase")]
150pub struct RedactPairRule {
151    /// A pattern to match for keys.
152    pub key_pattern: LazyPattern,
153}
154
155/// Supported scrubbing rules.
156#[derive(Deserialize, Serialize, Debug, Clone, PartialEq)]
157#[serde(tag = "type", rename_all = "snake_case")]
158pub enum RuleType {
159    /// Matches any value.
160    Anything,
161    /// Applies a regular expression.
162    Pattern(PatternRule),
163    /// Matchse an IMEI or IMEISV
164    Imei,
165    /// Matches a mac address
166    Mac,
167    /// Matches a UUID
168    Uuid,
169    /// Matches an email
170    Email,
171    /// Matches any IP address
172    Ip,
173    /// Matches a creditcard number
174    Creditcard,
175    /// Matches an IBAN
176    Iban,
177    /// Sanitizes a path from user data
178    Userpath,
179    /// A PEM encoded key
180    Pemkey,
181    /// Auth info from URLs
182    UrlAuth,
183    /// US SSN.
184    UsSsn,
185    /// Keys that look like passwords
186    Password,
187    /// When a regex matches a key, a value is removed
188    #[serde(alias = "redactPair")]
189    RedactPair(RedactPairRule),
190    /// Applies multiple rules.
191    Multiple(MultipleRule),
192    /// Applies another rule.  Works like a single multiple.
193    Alias(AliasRule),
194    /// Unknown ruletype for forward compatibility
195    Unknown(String),
196}
197
198/// A single rule configuration.
199#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
200pub struct RuleSpec {
201    /// The matching rule to apply on fields.
202    #[serde(flatten)]
203    pub ty: RuleType,
204
205    /// The redaction to apply on matched fields.
206    #[serde(default)]
207    pub redaction: Redaction,
208}
209
210/// Configuration for rule parameters.
211#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
212#[serde(rename_all = "camelCase")]
213pub struct Vars {
214    /// The default secret key for hashing operations.
215    #[serde(default, skip_serializing_if = "Option::is_none")]
216    pub hash_key: Option<String>,
217}
218
219impl Vars {
220    fn is_empty(&self) -> bool {
221        self.hash_key.is_none()
222    }
223}
224
225/// A set of named rule configurations.
226#[derive(Serialize, Deserialize, Debug, Default, Clone)]
227pub struct PiiConfig {
228    /// A map of custom PII rules.
229    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
230    pub rules: BTreeMap<String, RuleSpec>,
231
232    /// Parameters for PII rules.
233    #[serde(default, skip_serializing_if = "Vars::is_empty")]
234    pub vars: Vars,
235
236    /// Mapping of selectors to rules.
237    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
238    pub applications: BTreeMap<SelectorSpec, Vec<String>>,
239
240    /// PII config derived from datascrubbing settings.
241    ///
242    /// Cached because the conversion process is expensive.
243    #[serde(skip)]
244    pub(super) compiled: OnceLock<CompiledPiiConfig>,
245}
246
247impl PartialEq for PiiConfig {
248    fn eq(&self, other: &PiiConfig) -> bool {
249        // This is written in this way such that people will not forget to update this PartialEq
250        // impl when they add more fields.
251        let PiiConfig {
252            rules,
253            vars,
254            applications,
255            compiled: _compiled,
256        } = &self;
257
258        rules == &other.rules && vars == &other.vars && applications == &other.applications
259    }
260}
261
262impl PiiConfig {
263    /// Get a representation of this `PiiConfig` that is more (CPU-)efficient for processing.
264    ///
265    /// This can be computationally expensive when called for the first time. The result is cached
266    /// internally and reused on the second call.
267    pub fn compiled(&self) -> &CompiledPiiConfig {
268        self.compiled.get_or_init(|| self.compiled_uncached())
269    }
270
271    /// Like [`compiled`](Self::compiled) but without internal caching.
272    #[inline]
273    pub fn compiled_uncached(&self) -> CompiledPiiConfig {
274        CompiledPiiConfig::new(self)
275    }
276}