1use std::sync::LazyLock;
2
3use regex::Regex;
4use smallvec::{SmallVec, smallvec};
5
6use crate::config::RuleType;
7
8#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
9pub enum PatternType {
10 KeyValue,
12 Value,
14}
15
16#[derive(Clone, Debug, Eq, PartialEq)]
18pub enum ReplaceBehavior {
19 Value,
21
22 Groups(SmallVec<[u8; 1]>),
24}
25
26impl ReplaceBehavior {
27 pub fn replace_value() -> Self {
29 ReplaceBehavior::Value
30 }
31
32 pub fn replace_match() -> Self {
34 ReplaceBehavior::replace_group(0)
35 }
36
37 pub fn replace_group(g: u8) -> Self {
39 ReplaceBehavior::Groups(smallvec![g])
40 }
41
42 pub fn replace_groups(gs: SmallVec<[u8; 1]>) -> Self {
44 ReplaceBehavior::Groups(gs)
45 }
46}
47
48pub fn get_regex_for_rule_type(
50 ty: &RuleType,
51) -> SmallVec<[(PatternType, &Regex, ReplaceBehavior); 2]> {
52 let v = PatternType::Value;
53 let kv = PatternType::KeyValue;
54
55 match ty {
56 RuleType::RedactPair(redact_pair) => {
57 if let Ok(pattern) = redact_pair.key_pattern.compiled() {
58 smallvec![(kv, pattern, ReplaceBehavior::replace_value())]
59 } else {
60 smallvec![]
61 }
62 }
63 RuleType::Password => {
64 smallvec![(kv, &*PASSWORD_KEY_REGEX, ReplaceBehavior::replace_value())]
65 }
66 RuleType::Anything => smallvec![(v, &*ANYTHING_REGEX, ReplaceBehavior::replace_match())],
67 RuleType::Pattern(r) => {
68 let replace_behavior = match r.replace_groups {
69 Some(ref groups) => {
70 ReplaceBehavior::replace_groups(groups.iter().copied().collect())
71 }
72 None => ReplaceBehavior::replace_match(),
73 };
74 if let Ok(pattern) = r.pattern.compiled() {
75 smallvec![(v, pattern, replace_behavior)]
76 } else {
77 smallvec![]
78 }
79 }
80
81 RuleType::Imei => smallvec![(v, &*IMEI_REGEX, ReplaceBehavior::replace_match())],
82 RuleType::Mac => smallvec![(v, &*MAC_REGEX, ReplaceBehavior::replace_match())],
83 RuleType::Uuid => smallvec![(v, &*UUID_REGEX, ReplaceBehavior::replace_match())],
84 RuleType::Email => smallvec![(v, &*EMAIL_REGEX, ReplaceBehavior::replace_match())],
85 RuleType::Iban => smallvec![(v, &*IBAN_REGEX, ReplaceBehavior::replace_match())],
86 RuleType::Ip => smallvec![
87 (v, &*IPV4_REGEX, ReplaceBehavior::replace_match()),
88 (v, &*IPV6_REGEX, ReplaceBehavior::replace_group(1)),
89 ],
90 RuleType::Creditcard => {
91 smallvec![(v, &*CREDITCARD_REGEX, ReplaceBehavior::replace_match())]
92 }
93 RuleType::Pemkey => smallvec![(v, &*PEM_KEY_REGEX, ReplaceBehavior::replace_group(1))],
94 RuleType::UrlAuth => smallvec![(v, &*URL_AUTH_REGEX, ReplaceBehavior::replace_group(1))],
95 RuleType::UsSsn => smallvec![(v, &*US_SSN_REGEX, ReplaceBehavior::replace_match())],
96 RuleType::Userpath => smallvec![(v, &*PATH_REGEX, ReplaceBehavior::replace_group(1))],
97
98 RuleType::Alias(_) | RuleType::Multiple(_) | RuleType::Unknown(_) => smallvec![],
100 }
101}
102
103#[rustfmt::skip]
104macro_rules! ip {
105 (v4s) => { "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)" };
106 (v4a) => { concat!(ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s), "\\.", ip!(v4s)) };
107 (v6s) => { "[0-9a-fA-F]{1,4}" };
108}
109
110macro_rules! regex {
111 ($name:ident, $rule:expr) => {
112 #[allow(non_snake_case)]
113 mod $name {
114 use super::*;
115 pub static $name: LazyLock<Regex> = LazyLock::new(|| Regex::new($rule).unwrap());
116
117 #[test]
118 fn supports_byte_mode() {
119 assert!(
120 regex::bytes::RegexBuilder::new($name.as_str())
121 .unicode(false)
122 .multi_line(false)
123 .dot_matches_new_line(true)
124 .build()
125 .is_ok()
126 );
127 }
128 }
129 use $name::$name;
130 };
131}
132
133pub static ANYTHING_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(".*").unwrap());
134
135regex!(
136 IMEI_REGEX,
137 r"(?x)
138 \b
139 (\d{2}-?
140 \d{6}-?
141 \d{6}-?
142 \d{1,2})
143 \b
144 "
145);
146
147regex!(
148 MAC_REGEX,
149 r"(?x)
150 \b([[:xdigit:]]{2}[:-]){5}[[:xdigit:]]{2}\b
151 "
152);
153
154regex!(
155 UUID_REGEX,
156 r"(?ix)
157 \b
158 [a-z0-9]{8}-?
159 [a-z0-9]{4}-?
160 [a-z0-9]{4}-?
161 [a-z0-9]{4}-?
162 [a-z0-9]{12}
163 \b
164 "
165);
166
167regex!(
168 EMAIL_REGEX,
169 r"(?x)
170 \b
171 [a-zA-Z0-9.!\#$%&'*+/=?^_`{|}~-]+
172 @
173 [a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*
174 \b
175 "
176);
177
178regex!(
179 IBAN_REGEX,
180 r"(?x)
181 \b
182 (AT|AD|AE|AL|AZ|BA|BE|BG|BH|BR|BY|CH|CR|CY|CZ|DE|DK|DO|EE|EG|ES|FI|FO|FR|GB|GE|GI|GL|GR|GT|HR|HU|IE|IL|IQ|IS|IT|JO|KW|KZ|LB|LC|LI|LT|LU|LV|LY|MC|MD|ME|MK|MR|MT|MU|NL|NO|PK|PL|PS|PT|QA|RO|RU|RS|SA|SC|SE|SI|SK|SM|ST|SV|TL|TN|TR|UA|VA|VG|XK|DZ|AO|BJ|BF|BI|CV|CM|CF|TD|KM|CG|CI|DJ|GQ|GA|GW|HN|IR|MG|ML|MA|MZ|NI|NE|SN|TG)\d{2}[a-zA-Z0-9]{11,29}
183 \b
184 "
185);
186
187regex!(IPV4_REGEX, concat!("\\b", ip!(v4a), "\\b"));
188
189regex!(
190 IPV6_REGEX,
191 concat!(
192 "(?i)(?:[\\s]|[[:punct:]]|^)(",
193 "(",
194 ip!(v6s),
195 ":){7}",
196 ip!(v6s),
197 "|",
198 "(",
199 ip!(v6s),
200 ":){1,7}:|",
201 "(",
202 ip!(v6s),
203 ":){1,6}::",
204 ip!(v6s),
205 "|",
206 "(",
207 ip!(v6s),
208 ":){1,5}:(:",
209 ip!(v6s),
210 "){1,2}|",
211 "(",
212 ip!(v6s),
213 ":){1,4}:(:",
214 ip!(v6s),
215 "){1,3}|",
216 "(",
217 ip!(v6s),
218 ":){1,3}:(:",
219 ip!(v6s),
220 "){1,4}|",
221 "(",
222 ip!(v6s),
223 ":){1,2}:(:",
224 ip!(v6s),
225 "){1,5}|",
226 ip!(v6s),
227 ":((:",
228 ip!(v6s),
229 "){1,6})|",
230 ":((:",
231 ip!(v6s),
232 "){1,7}|:)|",
233 "fe80:(:",
234 ip!(v6s),
235 "){0,4}%[0-9a-zA-Z]{1,}",
236 "::(ffff(:0{1,4}){0,1}:){0,1}",
237 ip!(v4a),
238 "|",
239 "(",
240 ip!(v6s),
241 ":){1,4}:",
242 ip!(v4a),
243 ")([\\s]|[[:punct:]]|$)",
244 )
245);
246
247regex!(
253 CREDITCARD_REGEX,
254 r#"(?x)
255 \b(
256 (?: # vendor specific prefixes
257 3[47]\d # amex (no 13-digit version) (length: 15)
258 | 4\d{3} # visa (16-digit version only)
259 | 5[1-5]\d\d # mastercard
260 | 65\d\d # discover network (subset)
261 | 6011 # discover network (subset)
262 )
263
264 # "wildcard" remainder (allowing dashes in every position because of variable length)
265 ([-\s]?\d){12}
266 )\b
267 "#
268);
269
270regex!(
271 PATH_REGEX,
272 r"(?ix)
273 (?:
274 (?:
275 \b(?:[a-zA-Z]:[\\/])?
276 (?:users|home|documents and settings|[^/\\]+[/\\]profiles)[\\/]
277 ) | (?:
278 /(?:home|users)/
279 )
280 )
281 (
282 [^/\\\r\n]+
283 )
284 "
285);
286
287regex!(
288 PEM_KEY_REGEX,
289 r"(?sx)
290 (?:
291 -----
292 BEGIN[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
293 -----
294 [\t\ ]*\r?\n?
295 )
296 (.+?)
297 (?:
298 \r?\n?
299 -----
300 END[A-Z\ ]+(?:PRIVATE|PUBLIC)\ KEY
301 -----
302 )
303 "
304);
305
306regex!(
307 URL_AUTH_REGEX,
308 r"(?x)
309 \b(?:
310 (?:[a-z0-9+-]+:)?//
311 ([a-zA-Z0-9%_.-]+(?::[a-zA-Z0-9%_.-]+)?)
312 )@
313 "
314);
315
316regex!(
317 US_SSN_REGEX,
318 r"(?x)
319 \b(
320 [0-9]{3}-
321 [0-9]{2}-
322 [0-9]{4}
323 )\b
324 "
325);
326
327regex!(
328 PASSWORD_KEY_REGEX,
329 r"(?i)(password|secret|passwd|api_key|apikey|auth|credentials|mysql_pwd|privatekey|private_key|token|bearer)"
330);
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335
336 #[test]
337 fn test_userpath_utf8_bytes() {
338 let regex = regex::bytes::RegexBuilder::new(PATH_REGEX.as_str())
342 .unicode(false)
343 .multi_line(false)
344 .dot_matches_new_line(true)
345 .build()
346 .unwrap();
347 assert!(regex.is_match(br"C:\\Users\jane\somefile"));
348 }
349}