relay_cabi/
glob.rs

1//! Binary glob pattern matching for the C-ABI.
2
3use std::borrow::Cow;
4use std::num::NonZeroUsize;
5
6use globset::GlobBuilder;
7use lru::LruCache;
8use once_cell::sync::Lazy;
9use regex::bytes::{Regex, RegexBuilder};
10use std::sync::{Mutex, PoisonError};
11
12use crate::{RelayBuf, RelayStr};
13
14/// Controls the globbing behaviors.
15#[repr(u32)]
16pub enum GlobFlags {
17    /// When enabled `**` matches over path separators and `*` does not.
18    DoubleStar = 1,
19    /// Enables case insensitive path matching.
20    CaseInsensitive = 2,
21    /// Enables path normalization.
22    PathNormalize = 4,
23    /// Allows newlines.
24    AllowNewline = 8,
25}
26
27/// Performs a glob operation on bytes.
28///
29/// Returns `true` if the glob matches, `false` otherwise.
30#[unsafe(no_mangle)]
31#[relay_ffi::catch_unwind]
32pub unsafe extern "C" fn relay_is_glob_match(
33    value: *const RelayBuf,
34    pat: *const RelayStr,
35    flags: GlobFlags,
36) -> bool {
37    let mut options = GlobOptions::default();
38    let flags = flags as u32;
39    if (flags & GlobFlags::DoubleStar as u32) != 0 {
40        options.double_star = true;
41    }
42    if (flags & GlobFlags::CaseInsensitive as u32) != 0 {
43        options.case_insensitive = true;
44    }
45    if (flags & GlobFlags::PathNormalize as u32) != 0 {
46        options.path_normalize = true;
47    }
48    if (flags & GlobFlags::AllowNewline as u32) != 0 {
49        options.allow_newline = true;
50    }
51    glob_match_bytes(
52        unsafe { (*value).as_bytes() },
53        unsafe { (*pat).as_str() },
54        options,
55    )
56}
57
58/// LRU cache for [`Regex`]s in relation to [`GlobOptions`] and the provided string pattern.
59static GLOB_CACHE: Lazy<Mutex<LruCache<(GlobOptions, String), Regex>>> =
60    Lazy::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(500).unwrap())));
61
62/// Controls the options of the globber.
63#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
64struct GlobOptions {
65    /// When enabled `**` matches over path separators and `*` does not.
66    pub double_star: bool,
67    /// Enables case insensitive path matching.
68    pub case_insensitive: bool,
69    /// Enables path normalization.
70    pub path_normalize: bool,
71    /// Allows newlines.
72    pub allow_newline: bool,
73}
74
75fn translate_pattern(pat: &str, options: GlobOptions) -> Option<Regex> {
76    let mut builder = GlobBuilder::new(pat);
77    builder.case_insensitive(options.case_insensitive);
78    builder.literal_separator(options.double_star);
79    let glob = builder.build().ok()?;
80    RegexBuilder::new(glob.regex())
81        .dot_matches_new_line(options.allow_newline)
82        .build()
83        .ok()
84}
85
86/// Performs a glob operation on bytes.
87///
88/// Returns `true` if the glob matches, `false` otherwise.
89fn glob_match_bytes(value: &[u8], pat: &str, options: GlobOptions) -> bool {
90    let (value, pat) = if options.path_normalize {
91        (
92            Cow::Owned(
93                value
94                    .iter()
95                    .map(|&x| if x == b'\\' { b'/' } else { x })
96                    .collect(),
97            ),
98            pat.replace('\\', "/"),
99        )
100    } else {
101        (Cow::Borrowed(value), pat.to_string())
102    };
103    let key = (options, pat);
104    let mut cache = GLOB_CACHE.lock().unwrap_or_else(PoisonError::into_inner);
105
106    if let Some(pattern) = cache.get(&key) {
107        pattern.is_match(&value)
108    } else if let Some(pattern) = translate_pattern(&key.1, options) {
109        let rv = pattern.is_match(&value);
110        cache.put(key, pattern);
111        rv
112    } else {
113        false
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    fn glob_match(value: &str, pat: &str, options: GlobOptions) -> bool {
122        glob_match_bytes(value.as_bytes(), pat, options)
123    }
124
125    #[test]
126    fn test_globs() {
127        macro_rules! test_glob {
128            ($value:expr, $pat:expr, $is_match:expr, {$($k:ident: $v:expr),*}) => {{
129                #[allow(clippy::needless_update)]
130                let options = GlobOptions { $($k: $v,)* ..Default::default() };
131                assert!(
132                    glob_match($value, $pat, options) == $is_match,
133                    "expected that {} {} {} with options {:?}",
134                    $pat,
135                    if $is_match { "matches" } else { "does not match" },
136                    $value,
137                    &options,
138                );
139            }}
140        }
141
142        test_glob!("hello.py", "*.py", true, {});
143        test_glob!("hello.py", "*.js", false, {});
144        test_glob!("foo/hello.py", "*.py", true, {});
145        test_glob!("foo/hello.py", "*.py", false, {double_star: true});
146        test_glob!("foo/hello.py", "**/*.py", true, {double_star: true});
147        test_glob!("foo/hello.PY", "**/*.py", false, {double_star: true});
148        test_glob!("foo/hello.PY", "**/*.py", true, {double_star: true, case_insensitive: true});
149        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", false, {double_star: true, case_insensitive: true});
150        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
151        test_glob!("foo\nbar", "foo*", false, {});
152        test_glob!("foo\nbar", "foo*", true, {allow_newline: true});
153        test_glob!("1.18.4.2153-2aa83397b", "1.18.[0-4].*", true, {});
154
155        let mut long_string = "x".repeat(1_000_000);
156        long_string.push_str(".PY");
157        test_glob!(&long_string, "*************************.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
158        test_glob!(&long_string, "*************************.js", false, {double_star: true, case_insensitive: true, path_normalize: true});
159    }
160}