relay_cabi/
glob.rs

1//! Binary glob pattern matching for the C-ABI.
2
3use std::borrow::Cow;
4use std::num::NonZeroUsize;
5
6use globset::GlobBuilder;
7use lru::LruCache;
8use once_cell::sync::Lazy;
9use regex::bytes::{Regex, RegexBuilder};
10use std::sync::{Mutex, PoisonError};
11
12use crate::{RelayBuf, RelayStr};
13
14/// Controls the globbing behaviors.
15#[repr(u32)]
16pub enum GlobFlags {
17    /// When enabled `**` matches over path separators and `*` does not.
18    DoubleStar = 1,
19    /// Enables case insensitive path matching.
20    CaseInsensitive = 2,
21    /// Enables path normalization.
22    PathNormalize = 4,
23    /// Allows newlines.
24    AllowNewline = 8,
25}
26
27/// Performs a glob operation on bytes.
28///
29/// Returns `true` if the glob matches, `false` otherwise.
30#[no_mangle]
31#[relay_ffi::catch_unwind]
32pub unsafe extern "C" fn relay_is_glob_match(
33    value: *const RelayBuf,
34    pat: *const RelayStr,
35    flags: GlobFlags,
36) -> bool {
37    let mut options = GlobOptions::default();
38    let flags = flags as u32;
39    if (flags & GlobFlags::DoubleStar as u32) != 0 {
40        options.double_star = true;
41    }
42    if (flags & GlobFlags::CaseInsensitive as u32) != 0 {
43        options.case_insensitive = true;
44    }
45    if (flags & GlobFlags::PathNormalize as u32) != 0 {
46        options.path_normalize = true;
47    }
48    if (flags & GlobFlags::AllowNewline as u32) != 0 {
49        options.allow_newline = true;
50    }
51    glob_match_bytes((*value).as_bytes(), (*pat).as_str(), options)
52}
53
54/// LRU cache for [`Regex`]s in relation to [`GlobOptions`] and the provided string pattern.
55static GLOB_CACHE: Lazy<Mutex<LruCache<(GlobOptions, String), Regex>>> =
56    Lazy::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(500).unwrap())));
57
58/// Controls the options of the globber.
59#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
60struct GlobOptions {
61    /// When enabled `**` matches over path separators and `*` does not.
62    pub double_star: bool,
63    /// Enables case insensitive path matching.
64    pub case_insensitive: bool,
65    /// Enables path normalization.
66    pub path_normalize: bool,
67    /// Allows newlines.
68    pub allow_newline: bool,
69}
70
71fn translate_pattern(pat: &str, options: GlobOptions) -> Option<Regex> {
72    let mut builder = GlobBuilder::new(pat);
73    builder.case_insensitive(options.case_insensitive);
74    builder.literal_separator(options.double_star);
75    let glob = builder.build().ok()?;
76    RegexBuilder::new(glob.regex())
77        .dot_matches_new_line(options.allow_newline)
78        .build()
79        .ok()
80}
81
82/// Performs a glob operation on bytes.
83///
84/// Returns `true` if the glob matches, `false` otherwise.
85fn glob_match_bytes(value: &[u8], pat: &str, options: GlobOptions) -> bool {
86    let (value, pat) = if options.path_normalize {
87        (
88            Cow::Owned(
89                value
90                    .iter()
91                    .map(|&x| if x == b'\\' { b'/' } else { x })
92                    .collect(),
93            ),
94            pat.replace('\\', "/"),
95        )
96    } else {
97        (Cow::Borrowed(value), pat.to_string())
98    };
99    let key = (options, pat);
100    let mut cache = GLOB_CACHE.lock().unwrap_or_else(PoisonError::into_inner);
101
102    if let Some(pattern) = cache.get(&key) {
103        pattern.is_match(&value)
104    } else if let Some(pattern) = translate_pattern(&key.1, options) {
105        let rv = pattern.is_match(&value);
106        cache.put(key, pattern);
107        rv
108    } else {
109        false
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    fn glob_match(value: &str, pat: &str, options: GlobOptions) -> bool {
118        glob_match_bytes(value.as_bytes(), pat, options)
119    }
120
121    #[test]
122    fn test_globs() {
123        macro_rules! test_glob {
124            ($value:expr, $pat:expr, $is_match:expr, {$($k:ident: $v:expr),*}) => {{
125                #[allow(clippy::needless_update)]
126                let options = GlobOptions { $($k: $v,)* ..Default::default() };
127                assert!(
128                    glob_match($value, $pat, options) == $is_match,
129                    "expected that {} {} {} with options {:?}",
130                    $pat,
131                    if $is_match { "matches" } else { "does not match" },
132                    $value,
133                    &options,
134                );
135            }}
136        }
137
138        test_glob!("hello.py", "*.py", true, {});
139        test_glob!("hello.py", "*.js", false, {});
140        test_glob!("foo/hello.py", "*.py", true, {});
141        test_glob!("foo/hello.py", "*.py", false, {double_star: true});
142        test_glob!("foo/hello.py", "**/*.py", true, {double_star: true});
143        test_glob!("foo/hello.PY", "**/*.py", false, {double_star: true});
144        test_glob!("foo/hello.PY", "**/*.py", true, {double_star: true, case_insensitive: true});
145        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", false, {double_star: true, case_insensitive: true});
146        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
147        test_glob!("foo\nbar", "foo*", false, {});
148        test_glob!("foo\nbar", "foo*", true, {allow_newline: true});
149        test_glob!("1.18.4.2153-2aa83397b", "1.18.[0-4].*", true, {});
150
151        let mut long_string = "x".repeat(1_000_000);
152        long_string.push_str(".PY");
153        test_glob!(&long_string, "*************************.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
154        test_glob!(&long_string, "*************************.js", false, {double_star: true, case_insensitive: true, path_normalize: true});
155    }
156}