1use std::sync::LazyLock;
2
3use regex::Regex;
4use smallvec::{SmallVec, smallvec};
5
6use crate::config::RuleType;
7
8#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
9pub enum PatternType {
10 KeyValue,
12 Value,
14}
15
16#[derive(Clone, Debug, Eq, PartialEq)]
18pub enum ReplaceBehavior {
19 Value,
21
22 Groups(SmallVec<[u8; 1]>),
24}
25
26impl ReplaceBehavior {
27 pub fn replace_value() -> Self {
29 ReplaceBehavior::Value
30 }
31
32 pub fn replace_match() -> Self {
34 ReplaceBehavior::replace_group(0)
35 }
36
37 pub fn replace_group(g: u8) -> Self {
39 ReplaceBehavior::Groups(smallvec![g])
40 }
41
42 pub fn replace_groups(gs: SmallVec<[u8; 1]>) -> Self {
44 ReplaceBehavior::Groups(gs)
45 }
46}
47
48pub fn get_regex_for_rule_type(
50 ty: &RuleType,
51) -> SmallVec<[(PatternType, &Regex, ReplaceBehavior); 2]> {
52 let v = PatternType::Value;
53 let kv = PatternType::KeyValue;
54
55 match ty {
56 RuleType::RedactPair(redact_pair) => {
57 if let Ok(pattern) = redact_pair.key_pattern.compiled() {
58 smallvec![(kv, pattern, ReplaceBehavior::replace_value())]
59 } else {
60 smallvec![]
61 }
62 }
63 RuleType::Bearer => {
64 smallvec![(v, &*BEARER_TOKEN_REGEX, ReplaceBehavior::replace_match())]
65 }
66 RuleType::Password => {
67 smallvec![
68 (v, &*BEARER_TOKEN_REGEX, ReplaceBehavior::replace_match()),
71 (kv, &*PASSWORD_KEY_REGEX, ReplaceBehavior::replace_value()),
72 ]
73 }
74 RuleType::Anything => smallvec![(v, &*ANYTHING_REGEX, ReplaceBehavior::replace_match())],
75 RuleType::Pattern(r) => {
76 let replace_behavior = match r.replace_groups {
77 Some(ref groups) => {
78 ReplaceBehavior::replace_groups(groups.iter().copied().collect())
79 }
80 None => ReplaceBehavior::replace_match(),
81 };
82 if let Ok(pattern) = r.pattern.compiled() {
83 smallvec![(v, pattern, replace_behavior)]
84 } else {
85 smallvec![]
86 }
87 }
88
89 RuleType::Imei => smallvec![(v, &*IMEI_REGEX, ReplaceBehavior::replace_match())],
90 RuleType::Mac => smallvec![(v, &*MAC_REGEX, ReplaceBehavior::replace_match())],
91 RuleType::Uuid => smallvec![(v, &*UUID_REGEX, ReplaceBehavior::replace_match())],
92 RuleType::Email => smallvec![(v, &*EMAIL_REGEX, ReplaceBehavior::replace_match())],
93 RuleType::Iban => smallvec![(v, &*IBAN_REGEX, ReplaceBehavior::replace_match())],
94 RuleType::Ip => smallvec![
95 (v, &*IPV4_REGEX, ReplaceBehavior::replace_match()),
96 (v, &*IPV6_REGEX, ReplaceBehavior::replace_group(1)),
97 ],
98 RuleType::Creditcard => {
99 smallvec![(v, &*CREDITCARD_REGEX, ReplaceBehavior::replace_match())]
100 }
101 RuleType::Pemkey => smallvec![(v, &*PEM_KEY_REGEX, ReplaceBehavior::replace_group(1))],
102 RuleType::UrlAuth => smallvec![(v, &*URL_AUTH_REGEX, ReplaceBehavior::replace_group(1))],
103 RuleType::UsSsn => smallvec![(v, &*US_SSN_REGEX, ReplaceBehavior::replace_match())],
104 RuleType::Userpath => smallvec![(v, &*PATH_REGEX, ReplaceBehavior::replace_group(1))],
105
106 RuleType::Alias(_) | RuleType::Multiple(_) | RuleType::Unknown(_) => smallvec![],
108 }
109}
110
111#[rustfmt::skip]
112macro_rules! ip {
113 (v4s) => { "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" };
114 (v4a) => { concat!(ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s)) };
115 (v6s) => { "[0-9a-fA-F]{1,4}" };
116}
117
118macro_rules! regex {
119 ($name:ident, $rule:expr) => {
120 #[allow(non_snake_case)]
121 mod $name {
122 use super::*;
123 pub static $name: LazyLock<Regex> = LazyLock::new(|| Regex::new($rule).unwrap());
124
125 #[test]
126 fn supports_byte_mode() {
127 assert!(
128 regex::bytes::RegexBuilder::new($name.as_str())
129 .unicode(false)
130 .multi_line(false)
131 .dot_matches_new_line(true)
132 .build()
133 .is_ok()
134 );
135 }
136 }
137 use $name::$name;
138 };
139}
140
141pub static ANYTHING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*").unwrap());
142
143regex!(
144 IMEI_REGEX,
145 r"(?x)
146 \b
147 (\d{2}-?
148 \d{6}-?
149 \d{6}-?
150 \d{1,2})
151 \b
152 "
153);
154
155regex!(
156 MAC_REGEX,
157 r"(?x)
158 \b([[:xdigit:]]{2}[:-]){5}[[:xdigit:]]{2}\b
159 "
160);
161
162regex!(
163 UUID_REGEX,
164 r"(?ix)
165 \b
166 [a-z0-9]{8}-?
167 [a-z0-9]{4}-?
168 [a-z0-9]{4}-?
169 [a-z0-9]{4}-?
170 [a-z0-9]{12}
171 \b
172 "
173);
174
175regex!(
176 EMAIL_REGEX,
177 r"(?x)
178 \b
179 [a-zA-Z0-9.!\#$%&'*+/=?^_`{|}~-]+
180 @
181 [a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*
182 \b
183 "
184);
185
186regex!(
187 IBAN_REGEX,
188 r"(?x)
189 \b
190 (AT|AD|AE|AL|AZ|BA|BE|BG|BH|BR|BY|CH|CR|CY|CZ|DE|DK|DO|EE|EG|ES|FI|FO|FR|GB|GE|GI|GL|GR|GT|HR|HU|IE|IL|IQ|IS|IT|JO|KW|KZ|LB|LC|LI|LT|LU|LV|LY|MC|MD|ME|MK|MR|MT|MU|NL|NO|PK|PL|PS|PT|QA|RO|RU|RS|SA|SC|SE|SI|SK|SM|ST|SV|TL|TN|TR|UA|VA|VG|XK|DZ|AO|BJ|BF|BI|CV|CM|CF|TD|KM|CG|CI|DJ|GQ|GA|GW|HN|IR|MG|ML|MA|MZ|NI|NE|SN|TG)\d{2}[a-zA-Z0-9]{11,29}
191 \b
192 "
193);
194
195regex!(IPV4_REGEX, concat!("\\b", ip!(v4a), "\\b"));
196
197regex!(
198 IPV6_REGEX,
199 concat!(
200 "(?i)(?:[\\s]|[[:punct:]]|^)(",
201 "(",
202 ip!(v6s),
203 ":){7}",
204 ip!(v6s),
205 "|",
206 "(",
207 ip!(v6s),
208 ":){1,7}:|",
209 "(",
210 ip!(v6s),
211 ":){1,6}::",
212 ip!(v6s),
213 "|",
214 "(",
215 ip!(v6s),
216 ":){1,5}:(:",
217 ip!(v6s),
218 "){1,2}|",
219 "(",
220 ip!(v6s),
221 ":){1,4}:(:",
222 ip!(v6s),
223 "){1,3}|",
224 "(",
225 ip!(v6s),
226 ":){1,3}:(:",
227 ip!(v6s),
228 "){1,4}|",
229 "(",
230 ip!(v6s),
231 ":){1,2}:(:",
232 ip!(v6s),
233 "){1,5}|",
234 ip!(v6s),
235 ":((:",
236 ip!(v6s),
237 "){1,6})|",
238 ":((:",
239 ip!(v6s),
240 "){1,7}|:)|",
241 "fe80:(:",
242 ip!(v6s),
243 "){0,4}%[0-9a-zA-Z]{1,}",
244 "::(ffff(:0{1,4}){0,1}:){0,1}",
245 ip!(v4a),
246 "|",
247 "(",
248 ip!(v6s),
249 ":){1,4}:",
250 ip!(v4a),
251 ")([\\s]|[[:punct:]]|$)",
252 )
253);
254
255regex!(
261 CREDITCARD_REGEX,
262 r#"(?x)
263 \b(
264 (?: # vendor specific prefixes
265 3[47]\d # amex (no 13-digit version) (length: 15)
266 | 4\d{3} # visa (16-digit version only)
267 | 5[1-5]\d\d # mastercard
268 | 65\d\d # discover network (subset)
269 | 6011 # discover network (subset)
270 )
271
272 # "wildcard" remainder (allowing dashes in every position because of variable length)
273 ([-\s]?\d){12}
274 )\b
275 "#
276);
277
278regex!(
279 PATH_REGEX,
280 r"(?ix)
281 (?:
282 (?:
283 \b(?:[a-zA-Z]:[\\/])?
284 (?:users|home|documents and settings|[^/\\]+[/\\]profiles)[\\/]
285 ) | (?:
286 /(?:home|users)/
287 )
288 )
289 (
290 [^/\\\r\n]+
291 )
292 "
293);
294
295regex!(
296 PEM_KEY_REGEX,
297 r"(?sx)
298 (?:
299 -----
300 BEGIN[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
301 -----
302 [\t\ ]*\r?\n?
303 )
304 (.+?)
305 (?:
306 \r?\n?
307 -----
308 END[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
309 -----
310 )
311 "
312);
313
314regex!(
315 URL_AUTH_REGEX,
316 r"(?x)
317 \b(?:
318 (?:[a-z0-9+-]+:)?//
319 ([a-zA-Z0-9%_.-]+(?::[a-zA-Z0-9%_.-]+)?)
320 )@
321 "
322);
323
324regex!(
325 US_SSN_REGEX,
326 r"(?x)
327 \b(
328 [0-9]{3}-
329 [0-9]{2}-
330 [0-9]{4}
331 )\b
332 "
333);
334
335regex!(BEARER_TOKEN_REGEX, r"(?i)\b(Bearer\s+)([^\s]+)");
336
337regex!(
338 PASSWORD_KEY_REGEX,
339 r"(?i)(password|secret|passwd|api_key|apikey|auth|credentials|mysql_pwd|privatekey|private_key|token)"
340);
341
342#[cfg(test)]
343mod tests {
344 use super::*;
345
346 #[test]
347 fn test_userpath_utf8_bytes() {
348 let regex = regex::bytes::RegexBuilder::new(PATH_REGEX.as_str())
352 .unicode(false)
353 .multi_line(false)
354 .dot_matches_new_line(true)
355 .build()
356 .unwrap();
357 assert!(regex.is_match(br"C:\\Users\jane\somefile"));
358 }
359}