relay_cabi/
glob.rs

1//! Binary glob pattern matching for the C-ABI.
2
3use std::num::NonZeroUsize;
4use std::{borrow::Cow, sync::LazyLock};
5
6use globset::GlobBuilder;
7use lru::LruCache;
8use regex::bytes::{Regex, RegexBuilder};
9use std::sync::{Mutex, PoisonError};
10
11use crate::{RelayBuf, RelayStr};
12
13/// Controls the globbing behaviors.
14#[repr(u32)]
15pub enum GlobFlags {
16    /// When enabled `**` matches over path separators and `*` does not.
17    DoubleStar = 1,
18    /// Enables case insensitive path matching.
19    CaseInsensitive = 2,
20    /// Enables path normalization.
21    PathNormalize = 4,
22    /// Allows newlines.
23    AllowNewline = 8,
24}
25
26/// Performs a glob operation on bytes.
27///
28/// Returns `true` if the glob matches, `false` otherwise.
29#[unsafe(no_mangle)]
30#[relay_ffi::catch_unwind]
31pub unsafe extern "C" fn relay_is_glob_match(
32    value: *const RelayBuf,
33    pat: *const RelayStr,
34    flags: GlobFlags,
35) -> bool {
36    let mut options = GlobOptions::default();
37    let flags = flags as u32;
38    if (flags & GlobFlags::DoubleStar as u32) != 0 {
39        options.double_star = true;
40    }
41    if (flags & GlobFlags::CaseInsensitive as u32) != 0 {
42        options.case_insensitive = true;
43    }
44    if (flags & GlobFlags::PathNormalize as u32) != 0 {
45        options.path_normalize = true;
46    }
47    if (flags & GlobFlags::AllowNewline as u32) != 0 {
48        options.allow_newline = true;
49    }
50    glob_match_bytes(
51        unsafe { (*value).as_bytes() },
52        unsafe { (*pat).as_str() },
53        options,
54    )
55}
56
57/// LRU cache for [`Regex`]s in relation to [`GlobOptions`] and the provided string pattern.
58static GLOB_CACHE: LazyLock<Mutex<LruCache<(GlobOptions, String), Regex>>> =
59    LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(500).unwrap())));
60
61/// Controls the options of the globber.
62#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)]
63struct GlobOptions {
64    /// When enabled `**` matches over path separators and `*` does not.
65    pub double_star: bool,
66    /// Enables case insensitive path matching.
67    pub case_insensitive: bool,
68    /// Enables path normalization.
69    pub path_normalize: bool,
70    /// Allows newlines.
71    pub allow_newline: bool,
72}
73
74fn translate_pattern(pat: &str, options: GlobOptions) -> Option<Regex> {
75    let mut builder = GlobBuilder::new(pat);
76    builder.case_insensitive(options.case_insensitive);
77    builder.literal_separator(options.double_star);
78    let glob = builder.build().ok()?;
79    RegexBuilder::new(glob.regex())
80        .dot_matches_new_line(options.allow_newline)
81        .build()
82        .ok()
83}
84
85/// Performs a glob operation on bytes.
86///
87/// Returns `true` if the glob matches, `false` otherwise.
88fn glob_match_bytes(value: &[u8], pat: &str, options: GlobOptions) -> bool {
89    let (value, pat) = if options.path_normalize {
90        (
91            Cow::Owned(
92                value
93                    .iter()
94                    .map(|&x| if x == b'\\' { b'/' } else { x })
95                    .collect(),
96            ),
97            pat.replace('\\', "/"),
98        )
99    } else {
100        (Cow::Borrowed(value), pat.to_owned())
101    };
102    let key = (options, pat);
103    let mut cache = GLOB_CACHE.lock().unwrap_or_else(PoisonError::into_inner);
104
105    if let Some(pattern) = cache.get(&key) {
106        pattern.is_match(&value)
107    } else if let Some(pattern) = translate_pattern(&key.1, options) {
108        let rv = pattern.is_match(&value);
109        cache.put(key, pattern);
110        rv
111    } else {
112        false
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    fn glob_match(value: &str, pat: &str, options: GlobOptions) -> bool {
121        glob_match_bytes(value.as_bytes(), pat, options)
122    }
123
124    #[test]
125    fn test_globs() {
126        macro_rules! test_glob {
127            ($value:expr, $pat:expr, $is_match:expr, {$($k:ident: $v:expr),*}) => {{
128                #[allow(clippy::needless_update)]
129                let options = GlobOptions { $($k: $v,)* ..Default::default() };
130                assert!(
131                    glob_match($value, $pat, options) == $is_match,
132                    "expected that {} {} {} with options {:?}",
133                    $pat,
134                    if $is_match { "matches" } else { "does not match" },
135                    $value,
136                    &options,
137                );
138            }}
139        }
140
141        test_glob!("hello.py", "*.py", true, {});
142        test_glob!("hello.py", "*.js", false, {});
143        test_glob!("foo/hello.py", "*.py", true, {});
144        test_glob!("foo/hello.py", "*.py", false, {double_star: true});
145        test_glob!("foo/hello.py", "**/*.py", true, {double_star: true});
146        test_glob!("foo/hello.PY", "**/*.py", false, {double_star: true});
147        test_glob!("foo/hello.PY", "**/*.py", true, {double_star: true, case_insensitive: true});
148        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", false, {double_star: true, case_insensitive: true});
149        test_glob!("foo\\hello\\bar.PY", "foo/**/*.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
150        test_glob!("foo\nbar", "foo*", false, {});
151        test_glob!("foo\nbar", "foo*", true, {allow_newline: true});
152        test_glob!("1.18.4.2153-2aa83397b", "1.18.[0-4].*", true, {});
153
154        let mut long_string = "x".repeat(1_000_000);
155        long_string.push_str(".PY");
156        test_glob!(&long_string, "*************************.py", true, {double_star: true, case_insensitive: true, path_normalize: true});
157        test_glob!(&long_string, "*************************.js", false, {double_star: true, case_insensitive: true, path_normalize: true});
158    }
159}