1use std::sync::LazyLock;
2
3use regex::Regex;
4use smallvec::{SmallVec, smallvec};
5
6use crate::config::RuleType;
7
8#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
9pub enum PatternType {
10 KeyValue,
12 Key,
14 Value,
16}
17
18#[derive(Clone, Debug, Eq, PartialEq)]
20pub enum ReplaceBehavior {
21 Value,
23
24 Groups(SmallVec<[u8; 1]>),
26}
27
28impl ReplaceBehavior {
29 pub fn replace_value() -> Self {
31 ReplaceBehavior::Value
32 }
33
34 pub fn replace_match() -> Self {
36 ReplaceBehavior::replace_group(0)
37 }
38
39 pub fn replace_group(g: u8) -> Self {
41 ReplaceBehavior::Groups(smallvec![g])
42 }
43
44 pub fn replace_groups(gs: SmallVec<[u8; 1]>) -> Self {
46 ReplaceBehavior::Groups(gs)
47 }
48}
49
50pub fn get_regex_for_rule_type(
52 ty: &RuleType,
53) -> SmallVec<[(PatternType, &Regex, ReplaceBehavior); 2]> {
54 let v = PatternType::Value;
55 let k = PatternType::Key;
56 let kv = PatternType::KeyValue;
57
58 match ty {
59 RuleType::RedactPair(redact_pair) => {
60 if let Ok(pattern) = redact_pair.key_pattern.compiled() {
61 smallvec![(kv, pattern, ReplaceBehavior::replace_value())]
62 } else {
63 smallvec![]
64 }
65 }
66 RuleType::Bearer => {
67 smallvec![(v, &*BEARER_TOKEN_REGEX, ReplaceBehavior::replace_match())]
68 }
69 RuleType::Password => {
70 smallvec![
71 (v, &*BEARER_TOKEN_REGEX, ReplaceBehavior::replace_match()),
74 (k, &*TOKEN_KEY_REGEX, ReplaceBehavior::replace_value()),
75 (kv, &*PASSWORD_KEY_REGEX, ReplaceBehavior::replace_value()),
76 ]
77 }
78 RuleType::Anything => smallvec![(v, &*ANYTHING_REGEX, ReplaceBehavior::replace_match())],
79 RuleType::Pattern(r) => {
80 let replace_behavior = match r.replace_groups {
81 Some(ref groups) => {
82 ReplaceBehavior::replace_groups(groups.iter().copied().collect())
83 }
84 None => ReplaceBehavior::replace_match(),
85 };
86 if let Ok(pattern) = r.pattern.compiled() {
87 smallvec![(v, pattern, replace_behavior)]
88 } else {
89 smallvec![]
90 }
91 }
92
93 RuleType::Imei => smallvec![(v, &*IMEI_REGEX, ReplaceBehavior::replace_match())],
94 RuleType::Mac => smallvec![(v, &*MAC_REGEX, ReplaceBehavior::replace_match())],
95 RuleType::Uuid => smallvec![(v, &*UUID_REGEX, ReplaceBehavior::replace_match())],
96 RuleType::Email => smallvec![(v, &*EMAIL_REGEX, ReplaceBehavior::replace_match())],
97 RuleType::Iban => smallvec![(v, &*IBAN_REGEX, ReplaceBehavior::replace_match())],
98 RuleType::Ip => smallvec![
99 (v, &*IPV4_REGEX, ReplaceBehavior::replace_match()),
100 (v, &*IPV6_REGEX, ReplaceBehavior::replace_group(1)),
101 ],
102 RuleType::Creditcard => {
103 smallvec![(v, &*CREDITCARD_REGEX, ReplaceBehavior::replace_match())]
104 }
105 RuleType::Pemkey => smallvec![(v, &*PEM_KEY_REGEX, ReplaceBehavior::replace_group(1))],
106 RuleType::UrlAuth => smallvec![(v, &*URL_AUTH_REGEX, ReplaceBehavior::replace_group(1))],
107 RuleType::UsSsn => smallvec![(v, &*US_SSN_REGEX, ReplaceBehavior::replace_match())],
108 RuleType::Userpath => smallvec![(v, &*PATH_REGEX, ReplaceBehavior::replace_group(1))],
109
110 RuleType::Alias(_) | RuleType::Multiple(_) | RuleType::Unknown(_) => smallvec![],
112 }
113}
114
115#[rustfmt::skip]
116macro_rules! ip {
117 (v4s) => { "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" };
118 (v4a) => { concat!(ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s)) };
119 (v6s) => { "[0-9a-fA-F]{1,4}" };
120}
121
122macro_rules! regex {
123 ($name:ident, $rule:expr) => {
124 #[allow(non_snake_case)]
125 mod $name {
126 use super::*;
127 pub static $name: LazyLock<Regex> = LazyLock::new(|| Regex::new($rule).unwrap());
128
129 #[test]
130 fn supports_byte_mode() {
131 assert!(
132 regex::bytes::RegexBuilder::new($name.as_str())
133 .unicode(false)
134 .multi_line(false)
135 .dot_matches_new_line(true)
136 .build()
137 .is_ok()
138 );
139 }
140 }
141 use $name::$name;
142 };
143}
144
145pub static ANYTHING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*").unwrap());
146
147regex!(
148 IMEI_REGEX,
149 r"(?x)
150 \b
151 (\d{2}-?
152 \d{6}-?
153 \d{6}-?
154 \d{1,2})
155 \b
156 "
157);
158
159regex!(
160 MAC_REGEX,
161 r"(?x)
162 \b([[:xdigit:]]{2}[:-]){5}[[:xdigit:]]{2}\b
163 "
164);
165
166regex!(
167 UUID_REGEX,
168 r"(?ix)
169 \b
170 [a-z0-9]{8}-?
171 [a-z0-9]{4}-?
172 [a-z0-9]{4}-?
173 [a-z0-9]{4}-?
174 [a-z0-9]{12}
175 \b
176 "
177);
178
179regex!(
180 EMAIL_REGEX,
181 r"(?x)
182 \b
183 [a-zA-Z0-9.!\#$%&'*+/=?^_`{|}~-]+
184 @
185 [a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}
186 \b
187 "
188);
189
190regex!(
191 IBAN_REGEX,
192 r"(?x)
193 \b
194 (AT|AD|AE|AL|AZ|BA|BE|BG|BH|BR|BY|CH|CR|CY|CZ|DE|DK|DO|EE|EG|ES|FI|FO|FR|GB|GE|GI|GL|GR|GT|HR|HU|IE|IL|IQ|IS|IT|JO|KW|KZ|LB|LC|LI|LT|LU|LV|LY|MC|MD|ME|MK|MR|MT|MU|NL|NO|PK|PL|PS|PT|QA|RO|RU|RS|SA|SC|SE|SI|SK|SM|ST|SV|TL|TN|TR|UA|VA|VG|XK|DZ|AO|BJ|BF|BI|CV|CM|CF|TD|KM|CG|CI|DJ|GQ|GA|GW|HN|IR|MG|ML|MA|MZ|NI|NE|SN|TG)\d{2}[a-zA-Z0-9]{11,29}
195 \b
196 "
197);
198
199regex!(IPV4_REGEX, concat!("\\b", ip!(v4a), "\\b"));
200
201regex!(
202 IPV6_REGEX,
203 concat!(
204 "(?i)(?:[\\s]|[[:punct:]]|^)(",
205 "(",
206 ip!(v6s),
207 ":){7}",
208 ip!(v6s),
209 "|",
210 "(",
211 ip!(v6s),
212 ":){1,7}:|",
213 "(",
214 ip!(v6s),
215 ":){1,6}::",
216 ip!(v6s),
217 "|",
218 "(",
219 ip!(v6s),
220 ":){1,5}:(:",
221 ip!(v6s),
222 "){1,2}|",
223 "(",
224 ip!(v6s),
225 ":){1,4}:(:",
226 ip!(v6s),
227 "){1,3}|",
228 "(",
229 ip!(v6s),
230 ":){1,3}:(:",
231 ip!(v6s),
232 "){1,4}|",
233 "(",
234 ip!(v6s),
235 ":){1,2}:(:",
236 ip!(v6s),
237 "){1,5}|",
238 ip!(v6s),
239 ":((:",
240 ip!(v6s),
241 "){1,6})|",
242 ":((:",
243 ip!(v6s),
244 "){1,7}|:)|",
245 "fe80:(:",
246 ip!(v6s),
247 "){0,4}%[0-9a-zA-Z]{1,}",
248 "::(ffff(:0{1,4}){0,1}:){0,1}",
249 ip!(v4a),
250 "|",
251 "(",
252 ip!(v6s),
253 ":){1,4}:",
254 ip!(v4a),
255 ")([\\s]|[[:punct:]]|$)",
256 )
257);
258
259regex!(
265 CREDITCARD_REGEX,
266 r#"(?x)
267 \b(
268 (?: # vendor specific prefixes
269 3[47]\d # amex (no 13-digit version) (length: 15)
270 | 4\d{3} # visa (16-digit version only)
271 | 5[1-5]\d\d # mastercard
272 | 65\d\d # discover network (subset)
273 | 6011 # discover network (subset)
274 )
275
276 # "wildcard" remainder (allowing dashes in every position because of variable length)
277 ([-\s]?\d){12}
278 )\b
279 "#
280);
281
282regex!(
283 PATH_REGEX,
284 r"(?ix)
285 (?:
286 (?:
287 \b(?:[a-zA-Z]:[\\/])?
288 (?:users|home|documents and settings|[^/\\]+[/\\]profiles)[\\/]
289 ) | (?:
290 /(?:home|users)/
291 )
292 )
293 (
294 [^/\\\r\n]+
295 )
296 "
297);
298
299regex!(
300 PEM_KEY_REGEX,
301 r"(?sx)
302 (?:
303 -----
304 BEGIN[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
305 -----
306 [\t\ ]*\r?\n?
307 )
308 (.+?)
309 (?:
310 \r?\n?
311 -----
312 END[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
313 -----
314 )
315 "
316);
317
318regex!(
319 URL_AUTH_REGEX,
320 r"(?x)
321 \b(?:
322 (?:[a-z0-9+-]+:)?//
323 ([a-zA-Z0-9%_.-]+(?::[a-zA-Z0-9%_.-]+)?)
324 )@
325 "
326);
327
328regex!(
329 US_SSN_REGEX,
330 r"(?x)
331 \b(
332 [0-9]{3}-
333 [0-9]{2}-
334 [0-9]{4}
335 )\b
336 "
337);
338
339regex!(BEARER_TOKEN_REGEX, r"(?i)\b(Bearer\s+)([^\s]+)");
340
341regex!(TOKEN_KEY_REGEX, r"(?i)(token)");
342
343regex!(
344 PASSWORD_KEY_REGEX,
345 r"(?i)(password|secret|passwd|api[-_]key|apikey|auth|credentials|mysql_pwd|privatekey|private[-_]key|token[^\s]*[:=]|^otp$|^two[-_]factor$)"
346);
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[test]
353 fn test_userpath_utf8_bytes() {
354 let regex = regex::bytes::RegexBuilder::new(PATH_REGEX.as_str())
358 .unicode(false)
359 .multi_line(false)
360 .dot_matches_new_line(true)
361 .build()
362 .unwrap();
363 assert!(regex.is_match(br"C:\\Users\jane\somefile"));
364 }
365}