relay_pattern/
wildmatch.rs

1use std::num::NonZeroUsize;
2
3use crate::{Literal, Options, Ranges, Token, Tokens};
4
5/// Matches [`Tokens`] against a `haystack` with the provided [`Options`].
6///
7/// This implementation is largely based on the algorithm described by [Kirk J Krauss]
8/// and combining the two loops into a single one and other small modifications to take advantage
9/// of the already pre-processed [`Tokens`] structure and its invariants.
10///
11/// [Kirk J Krauss]: http://developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html
12pub fn is_match(haystack: &str, tokens: &Tokens, options: Options) -> bool {
13    match options.case_insensitive {
14        false => is_match_impl::<_, CaseSensitive>(haystack, tokens.as_slice()),
15        true => is_match_impl::<_, CaseInsensitive>(haystack, tokens.as_slice()),
16    }
17}
18
19#[inline(always)]
20fn is_match_impl<T, M>(haystack: &str, tokens: &T) -> bool
21where
22    T: TokenIndex + ?Sized,
23    M: Matcher,
24{
25    // Remainder of the haystack which still needs to be matched.
26    let mut h_current = haystack;
27    // Saved haystack position for backtracking.
28    let mut h_revert = haystack;
29    // Revert index for `tokens`. In case of backtracking we backtrack to this index.
30    //
31    // If `t_revert` is zero, it means there is no currently saved backtracking position.
32    let mut t_revert = 0;
33    // The next token position which needs to be evaluted.
34    let mut t_next = 0;
35
36    macro_rules! advance {
37        ($len:expr) => {{
38            h_current = &h_current[$len..];
39            true
40        }};
41    }
42
43    // Empty glob never matches.
44    if tokens.is_empty() {
45        return false;
46    }
47
48    while t_next != tokens.len() || !h_current.is_empty() {
49        let matched = if t_next == tokens.len() {
50            false
51        } else {
52            let token = &tokens[t_next];
53            t_next += 1;
54
55            match token {
56                Token::Literal(literal) => match M::is_prefix(h_current, literal) {
57                    Some(n) => advance!(n),
58                    // The literal does not match, but it may match after backtracking.
59                    // TODO: possible optimization: if the literal cannot possibly match anymore
60                    // because it is too long for the remaining haystack, we can immediately return
61                    // no match here.
62                    None => false,
63                },
64                Token::Any(n) => {
65                    advance!(match n_chars_to_bytes(*n, h_current) {
66                        Some(n) => n,
67                        // Not enough characters in the haystack remaining,
68                        // there cannot be any other possible match.
69                        None => return false,
70                    });
71                    true
72                }
73                Token::Wildcard => {
74                    // `ab*c*` matches `abcd`.
75                    if t_next == tokens.len() {
76                        return true;
77                    }
78
79                    t_revert = t_next;
80
81                    match skip_to_token::<_, M>(tokens, t_next, h_current) {
82                        Some((tokens, revert, remaining)) => {
83                            t_next += tokens;
84                            h_revert = revert;
85                            h_current = remaining;
86                        }
87                        None => return false,
88                    };
89
90                    true
91                }
92                Token::Class { negated, ranges } => match h_current.chars().next() {
93                    Some(next) => {
94                        M::ranges_match(next, *negated, ranges) && advance!(next.len_utf8())
95                    }
96                    None => false,
97                },
98                Token::Alternates(alternates) => {
99                    // TODO: should we make this iterative instead of recursive?
100                    let matches = alternates.iter().any(|alternate| {
101                        let tokens = tokens.with_alternate(t_next, alternate.as_slice());
102                        is_match_impl::<_, M>(h_current, &tokens)
103                    });
104
105                    // The brace match already matches to the end, if it is successful we can end right here.
106                    if matches {
107                        return true;
108                    }
109                    // No match, allow for backtracking.
110                    false
111                }
112                Token::Optional(optional) => {
113                    let optional = tokens.with_alternate(t_next, optional.as_slice());
114                    if is_match_impl::<_, M>(h_current, &optional) {
115                        // There is a match with the optional token, we're done.
116                        return true;
117                    }
118                    // Continue on without the optional token.
119                    true
120                }
121            }
122        };
123
124        if !matched {
125            if t_revert == 0 {
126                // No backtracking necessary, no star encountered.
127                // Didn't match and no backtracking -> no match.
128                return false;
129            }
130            h_current = h_revert;
131            t_next = t_revert;
132
133            // Backtrack to the previous location +1 character.
134            advance!(match n_chars_to_bytes(NonZeroUsize::MIN, h_current) {
135                Some(n) => n,
136                None => return false,
137            });
138
139            match skip_to_token::<_, M>(tokens, t_next, h_current) {
140                Some((tokens, revert, remaining)) => {
141                    t_next += tokens;
142                    h_revert = revert;
143                    h_current = remaining;
144                }
145                None => return false,
146            };
147        }
148    }
149
150    true
151}
152
153/// Bundles necessary matchers for [`is_match_impl`].
154trait Matcher {
155    /// Returns the length of the `needle` in the `haystack` if the `needle` is a prefix of `haystack`.
156    fn is_prefix(haystack: &str, needle: &Literal) -> Option<usize>;
157    /// Searches for the `needle` in the `haystack` and returns the index of the start of the match
158    /// and the length of match or `None` if the `needle` is not contained in the `haystack`.
159    fn find(haystack: &str, needle: &Literal) -> Option<(usize, usize)>;
160    /// Returns `true` if the char `c` is contained within `ranges`.
161    fn ranges_match(c: char, negated: bool, ranges: &Ranges) -> bool;
162    /// Searches for the first char in the `haystack` that is contained in one of the `ranges`.
163    ///
164    /// Returns the offset in bytes and matching `char` if the range is contained within the
165    /// `haystack`.
166    #[inline(always)]
167    fn ranges_find(haystack: &str, negated: bool, ranges: &Ranges) -> Option<(usize, char)> {
168        // TODO: possibly optimize range finding.
169        // TODO: possibly optimize with `memchr{1,2,3}` for short ranges.
170        haystack
171            .char_indices()
172            .find(|&(_, c)| Self::ranges_match(c, negated, ranges))
173    }
174}
175
176/// A case sensitive [`Matcher`].
177struct CaseSensitive;
178
179impl Matcher for CaseSensitive {
180    #[inline(always)]
181    fn is_prefix(haystack: &str, needle: &Literal) -> Option<usize> {
182        let needle = needle.as_case_converted_bytes();
183        memchr::arch::all::is_prefix(haystack.as_bytes(), needle).then_some(needle.len())
184    }
185
186    #[inline(always)]
187    fn find(haystack: &str, needle: &Literal) -> Option<(usize, usize)> {
188        let needle = needle.as_case_converted_bytes();
189        memchr::memmem::find(haystack.as_bytes(), needle).map(|offset| (offset, needle.len()))
190    }
191
192    #[inline(always)]
193    fn ranges_match(c: char, negated: bool, ranges: &Ranges) -> bool {
194        ranges.contains(c) ^ negated
195    }
196}
197
198/// A case insensitive [`Matcher`].
199struct CaseInsensitive;
200
201impl Matcher for CaseInsensitive {
202    #[inline(always)]
203    fn is_prefix(haystack: &str, needle: &Literal) -> Option<usize> {
204        // We can safely assume `needle` is already full lowercase. This transformation is done on
205        // token creation based on the options.
206        //
207        // The haystack cannot be converted to full lowercase to not break class matches on
208        // uppercase unicode characters which would produce multiple lowercase characters.
209        //
210        // TODO: benchmark if allocation free is better/faster.
211        let needle = needle.as_case_converted_bytes();
212        let lower_haystack = haystack.to_lowercase();
213
214        memchr::arch::all::is_prefix(lower_haystack.as_bytes(), needle)
215            .then(|| recover_offset_len(haystack, 0, needle.len()).1)
216    }
217
218    #[inline(always)]
219    fn find(haystack: &str, needle: &Literal) -> Option<(usize, usize)> {
220        // TODO: implement manual lowercase which remembers if there were 'special' unicode
221        // conversion involved, if not, there is no recovery necessary.
222        // TODO: benchmark if a lut from offset -> original offset makes sense.
223        // TODO: benchmark allocation free and search with proper case insensitive search.
224        let needle = needle.as_case_converted_bytes();
225        let lower_haystack = haystack.to_lowercase();
226
227        let offset = memchr::memmem::find(lower_haystack.as_bytes(), needle)?;
228
229        // `offset` now points into the lowercase converted string, but this may not match the
230        // offset in the original string. Time to recover the index.
231        Some(recover_offset_len(haystack, offset, offset + needle.len()))
232    }
233
234    #[inline(always)]
235    fn ranges_match(c: char, negated: bool, ranges: &Ranges) -> bool {
236        let matches = exactly_one(c.to_lowercase()).is_some_and(|c| ranges.contains(c))
237            || exactly_one(c.to_uppercase()).is_some_and(|c| ranges.contains(c));
238        matches ^ negated
239    }
240}
241
242/// Efficiently skips to the next matching possible match after a wildcard.
243///
244/// Tokens must be indexable with `t_next`.
245///
246/// Returns `None` if there is no match and the matching can be aborted.
247/// Otherwise returns the amount of tokens consumed, the new save point to backtrack to
248/// and the remaining haystack.
249#[inline(always)]
250fn skip_to_token<'a, T, M>(
251    tokens: &T,
252    t_next: usize,
253    haystack: &'a str,
254) -> Option<(usize, &'a str, &'a str)>
255where
256    T: TokenIndex + ?Sized,
257    M: Matcher,
258{
259    let next = &tokens[t_next];
260
261    // TODO: optimize other cases like:
262    //  - `[Any(n), Literal(_), ..]` (skip + literal find)
263    //  - `[Any(n)]` (minimum remaining length)
264    Some(match next {
265        Token::Literal(literal) => {
266            match M::find(haystack, literal) {
267                // We cannot use `offset + literal.len()` as the revert position
268                // to not discard overlapping matches.
269                Some((offset, len)) => (1, &haystack[offset..], &haystack[offset + len..]),
270                // The literal does not exist in the remaining slice.
271                // No backtracking necessary, we won't ever find it.
272                None => return None,
273            }
274        }
275        Token::Class { negated, ranges } => {
276            match M::ranges_find(haystack, *negated, ranges) {
277                Some((offset, c)) => (1, &haystack[offset..], &haystack[offset + c.len_utf8()..]),
278                // None of the remaining characters matches this class.
279                // No backtracking necessary, we won't ever find it.
280                None => return None,
281            }
282        }
283        _ => {
284            // We didn't consume and match the token, revert to the previous state and
285            // let the generic matching with slower backtracking handle the token.
286            (0, haystack, haystack)
287        }
288    })
289}
290
291/// Calculates a byte offset of the next `n` chars in the string `s`.
292///
293/// Returns `None` if the string is too short.
294#[inline(always)]
295fn n_chars_to_bytes(n: NonZeroUsize, s: &str) -> Option<usize> {
296    // Fast path check, if there are less bytes than characters.
297    if n.get() > s.len() {
298        return None;
299    }
300    s.char_indices()
301        .nth(n.get() - 1)
302        .map(|(i, c)| i + c.len_utf8())
303}
304
305/// Returns `Some` if `iter` contains exactly one element.
306#[inline(always)]
307fn exactly_one<T>(mut iter: impl Iterator<Item = T>) -> Option<T> {
308    let item = iter.next()?;
309    match iter.next() {
310        Some(_) => None,
311        None => Some(item),
312    }
313}
314
315/// Recovers offset and length from a case insensitive search in `haystack` using a lowecase
316/// haystack and a lowercase needle.
317///
318/// `lower_offset` is the offset of the match in the lowercase haystack.
319/// `lower_end` is the end offset of the match in the lowercase haystack.
320///
321/// Returns the recovered offset and length.
322#[inline(always)]
323fn recover_offset_len(
324    haystack: &str,
325    lower_offset: usize,
326    lower_offset_end: usize,
327) -> (usize, usize) {
328    haystack
329        .chars()
330        .try_fold((0, 0, 0), |(lower, h_offset, h_len), c| {
331            let lower = lower + c.to_lowercase().map(|c| c.len_utf8()).sum::<usize>();
332
333            if lower <= lower_offset {
334                Ok((lower, h_offset + c.len_utf8(), 0))
335            } else if lower <= lower_offset_end {
336                Ok((lower, h_offset, h_len + c.len_utf8()))
337            } else {
338                Err((h_offset, h_len))
339            }
340        })
341        .map_or_else(|e| e, |(_, offset, len)| (offset, len))
342}
343
344/// Minimum requirements to process tokens during matching.
345///
346/// This is very closely coupled to [`is_match_impl`] and the process
347/// of also matching alternate branches of a pattern. We use a trait here
348/// to make use of monomorphization to make sure the alternate matching
349/// can be inlined.
350trait TokenIndex: std::ops::Index<usize, Output = Token> + std::fmt::Debug {
351    /// The type returned from [`Self::with_alternate`].
352    ///
353    /// We need an associated type here to not produce and endlessly recursive
354    /// type. Alternates are only allowed for the first 'level' (can't be nested),
355    /// which allows us to avoid the recursion.
356    type WithAlternates<'a>: TokenIndex
357    where
358        Self: 'a;
359
360    fn len(&self) -> usize;
361
362    #[inline(always)]
363    fn is_empty(&self) -> bool {
364        self.len() == 0
365    }
366
367    /// Merges the current instance with alternate tokens and returns [`Self::WithAlternates`].
368    fn with_alternate<'a>(
369        &'a self,
370        offset: usize,
371        alternate: &'a [Token],
372    ) -> Self::WithAlternates<'a>;
373}
374
375impl TokenIndex for [Token] {
376    type WithAlternates<'a> = AltAndTokens<'a>;
377
378    #[inline(always)]
379    fn len(&self) -> usize {
380        self.len()
381    }
382
383    #[inline(always)]
384    fn with_alternate<'a>(
385        &'a self,
386        offset: usize,
387        alternate: &'a [Token],
388    ) -> Self::WithAlternates<'a> {
389        AltAndTokens {
390            alternate,
391            tokens: &self[offset..],
392        }
393    }
394}
395
396/// A [`TokenIndex`] implementation which has been combined with tokens from an alternation.
397///
398/// Each alternate in the pattern creates a new individual matching branch with the alternate
399/// currently being matched, the remaining tokens of the original pattern and the remaining
400/// haystack which is yet to be matched.
401///
402/// If the resulting submatch matches the total pattern matches, if it does not match
403/// another branch is tested.
404#[derive(Debug)]
405struct AltAndTokens<'a> {
406    /// The alternation tokens.
407    alternate: &'a [Token],
408    /// The remaining tokens of the original pattern.
409    tokens: &'a [Token],
410}
411
412impl TokenIndex for AltAndTokens<'_> {
413    // Type here does not matter, we implement `with_alternate` by returning the never type.
414    // It just needs to satisfy the `TokenIndex` trait bound.
415    type WithAlternates<'b>
416        = AltAndTokens<'b>
417    where
418        Self: 'b;
419
420    #[inline(always)]
421    fn len(&self) -> usize {
422        self.alternate.len() + self.tokens.len()
423    }
424
425    fn with_alternate<'b>(
426        &'b self,
427        offset: usize,
428        alternate: &'b [Token],
429    ) -> Self::WithAlternates<'b> {
430        if offset < self.alternate.len() {
431            unreachable!("No nested alternates")
432        }
433        AltAndTokens {
434            alternate,
435            tokens: &self.tokens[offset - self.alternate.len()..],
436        }
437    }
438}
439
440impl std::ops::Index<usize> for AltAndTokens<'_> {
441    type Output = Token;
442
443    fn index(&self, index: usize) -> &Self::Output {
444        if index < self.alternate.len() {
445            &self.alternate[index]
446        } else {
447            &self.tokens[index - self.alternate.len()]
448        }
449    }
450}
451
452// Just some tests, full test suite is run on globs not tokens.
453#[cfg(test)]
454mod tests {
455    use super::*;
456    use crate::{Range, Ranges};
457
458    fn literal(s: &str) -> Literal {
459        Literal::new(s.to_owned(), Default::default())
460    }
461
462    fn literal_ci(s: &str) -> Literal {
463        Literal::new(
464            s.to_owned(),
465            Options {
466                case_insensitive: true,
467            },
468        )
469    }
470
471    fn range(start: char, end: char) -> Ranges {
472        Ranges::Single(Range { start, end })
473    }
474
475    #[test]
476    fn test_exactly_one() {
477        assert_eq!(exactly_one([].into_iter()), None::<i32>);
478        assert_eq!(exactly_one([1].into_iter()), Some(1));
479        assert_eq!(exactly_one([1, 2].into_iter()), None);
480        assert_eq!(exactly_one([1, 2, 3].into_iter()), None);
481    }
482
483    #[test]
484    fn test_literal() {
485        let mut tokens = Tokens::default();
486        tokens.push(Token::Literal(literal("abc")));
487        assert!(is_match("abc", &tokens, Default::default()));
488        assert!(!is_match("abcd", &tokens, Default::default()));
489        assert!(!is_match("bc", &tokens, Default::default()));
490    }
491
492    #[test]
493    fn test_class() {
494        let mut tokens = Tokens::default();
495        tokens.push(Token::Literal(literal("a")));
496        tokens.push(Token::Class {
497            negated: false,
498            ranges: Ranges::Single(Range::single('b')),
499        });
500        tokens.push(Token::Literal(literal("c")));
501        assert!(is_match("abc", &tokens, Default::default()));
502        assert!(!is_match("aac", &tokens, Default::default()));
503        assert!(!is_match("abbc", &tokens, Default::default()));
504    }
505
506    #[test]
507    fn test_class_negated() {
508        let mut tokens = Tokens::default();
509        tokens.push(Token::Literal(literal("a")));
510        tokens.push(Token::Class {
511            negated: true,
512            ranges: Ranges::Single(Range::single('b')),
513        });
514        tokens.push(Token::Literal(literal("c")));
515        assert!(!is_match("abc", &tokens, Default::default()));
516        assert!(is_match("aac", &tokens, Default::default()));
517        assert!(!is_match("abbc", &tokens, Default::default()));
518    }
519
520    #[test]
521    fn test_any_one() {
522        let mut tokens = Tokens::default();
523        tokens.push(Token::Literal(literal("a")));
524        tokens.push(Token::Any(NonZeroUsize::MIN));
525        tokens.push(Token::Literal(literal("c")));
526        assert!(is_match("abc", &tokens, Default::default()));
527        assert!(is_match("aඞc", &tokens, Default::default()));
528        assert!(!is_match("abbc", &tokens, Default::default()));
529    }
530
531    #[test]
532    fn test_any_many() {
533        let mut tokens = Tokens::default();
534        tokens.push(Token::Literal(literal("a")));
535        tokens.push(Token::Any(NonZeroUsize::new(2).unwrap()));
536        tokens.push(Token::Literal(literal("d")));
537        assert!(is_match("abcd", &tokens, Default::default()));
538        assert!(is_match("aඞ_d", &tokens, Default::default()));
539        assert!(!is_match("abbc", &tokens, Default::default()));
540        assert!(!is_match("abcde", &tokens, Default::default()));
541        assert!(!is_match("abc", &tokens, Default::default()));
542        assert!(!is_match("bcd", &tokens, Default::default()));
543    }
544
545    #[test]
546    fn test_any_unicode() {
547        let mut tokens = Tokens::default();
548        tokens.push(Token::Literal(literal("a")));
549        tokens.push(Token::Any(NonZeroUsize::new(3).unwrap()));
550        tokens.push(Token::Literal(literal("a")));
551        assert!(is_match("abbba", &tokens, Default::default()));
552        assert!(is_match("aඞbඞa", &tokens, Default::default()));
553        // `i̇` is `i\u{307}`
554        assert!(is_match("aඞi̇a", &tokens, Default::default()));
555        assert!(!is_match("aඞi̇ඞa", &tokens, Default::default()));
556    }
557
558    #[test]
559    fn test_wildcard_start() {
560        let mut tokens = Tokens::default();
561        tokens.push(Token::Wildcard);
562        tokens.push(Token::Literal(literal("b")));
563        assert!(is_match("b", &tokens, Default::default()));
564        assert!(is_match("aaaab", &tokens, Default::default()));
565        assert!(is_match("ඞb", &tokens, Default::default()));
566        assert!(is_match("bbbbbbbbb", &tokens, Default::default()));
567        assert!(!is_match("", &tokens, Default::default()));
568        assert!(!is_match("a", &tokens, Default::default()));
569        assert!(!is_match("aa", &tokens, Default::default()));
570        assert!(!is_match("aaa", &tokens, Default::default()));
571        assert!(!is_match("ba", &tokens, Default::default()));
572    }
573
574    #[test]
575    fn test_wildcard_end() {
576        let mut tokens = Tokens::default();
577        tokens.push(Token::Literal(literal("a")));
578        tokens.push(Token::Wildcard);
579        assert!(is_match("a", &tokens, Default::default()));
580        assert!(is_match("aaaab", &tokens, Default::default()));
581        assert!(is_match("aඞ", &tokens, Default::default()));
582        assert!(!is_match("", &tokens, Default::default()));
583        assert!(!is_match("b", &tokens, Default::default()));
584        assert!(!is_match("bb", &tokens, Default::default()));
585        assert!(!is_match("bbb", &tokens, Default::default()));
586        assert!(!is_match("ba", &tokens, Default::default()));
587    }
588
589    #[test]
590    fn test_wildcard_end_unicode_case_insensitive() {
591        let options = Options {
592            case_insensitive: true,
593        };
594        let mut tokens = Tokens::default();
595        tokens.push(Token::Literal(Literal::new("İ".to_owned(), options)));
596        tokens.push(Token::Wildcard);
597
598        assert!(is_match("İ___", &tokens, options));
599        assert!(is_match("İ", &tokens, options));
600        assert!(is_match("i̇", &tokens, options));
601        assert!(is_match("i\u{307}___", &tokens, options));
602        assert!(!is_match("i____", &tokens, options));
603    }
604
605    #[test]
606    fn test_alternate() {
607        let mut tokens = Tokens::default();
608        tokens.push(Token::Literal(literal("a")));
609        tokens.push(Token::Alternates(vec![
610            {
611                let mut tokens = Tokens::default();
612                tokens.push(Token::Literal(literal("b")));
613                tokens
614            },
615            {
616                let mut tokens = Tokens::default();
617                tokens.push(Token::Literal(literal("c")));
618                tokens
619            },
620        ]));
621        tokens.push(Token::Literal(literal("a")));
622        assert!(is_match("aba", &tokens, Default::default()));
623        assert!(is_match("aca", &tokens, Default::default()));
624        assert!(!is_match("ada", &tokens, Default::default()));
625    }
626
627    #[test]
628    fn test_optional() {
629        let mut tokens = Tokens::default();
630
631        tokens.push(Token::Optional(Tokens(vec![Token::Literal(literal(
632            "foo",
633        ))])));
634        assert!(is_match("foo", &tokens, Default::default()));
635        assert!(is_match("", &tokens, Default::default()));
636    }
637
638    #[test]
639    fn test_optional_alternate() {
640        let mut tokens = Tokens::default();
641        let alternates = Token::Alternates(vec![
642            {
643                let mut tokens = Tokens::default();
644                tokens.push(Token::Literal(literal("foo")));
645                tokens
646            },
647            {
648                let mut tokens = Tokens::default();
649                tokens.push(Token::Literal(literal("bar")));
650                tokens
651            },
652        ]);
653
654        tokens.push(Token::Optional(Tokens(vec![alternates])));
655        assert!(is_match("foo", &tokens, Default::default()));
656        assert!(is_match("bar", &tokens, Default::default()));
657        assert!(is_match("", &tokens, Default::default()));
658    }
659
660    #[test]
661    fn test_matcher_case_sensitive_prefix() {
662        macro_rules! test {
663            ($haystack:expr, $needle:expr, $result:expr) => {
664                assert_eq!(
665                    CaseSensitive::is_prefix($haystack, &literal($needle)),
666                    $result
667                );
668            };
669        }
670
671        test!("foobar", "f", Some(1));
672        test!("foobar", "foo", Some(3));
673        test!("foobar", "foobar", Some(6));
674        test!("foobar", "oobar", None);
675        test!("foobar", "foobar2", None);
676        test!("İ", "İ", Some(2));
677        test!("İ", "i", None);
678        test!("i", "İ", None);
679        test!("i", "i", Some(1));
680        test!("i̇", "i", Some(1));
681        test!("i̇", "i\u{307}", Some(3));
682        test!("i̇x", "i\u{307}", Some(3));
683        test!("i̇x", "i\u{307}x", Some(4));
684        test!("i̇x", "i\u{307}_", None);
685    }
686
687    #[test]
688    fn test_matcher_case_sensitive_find() {
689        macro_rules! test {
690            ($haystack:expr, $needle:expr, $result:expr) => {
691                assert_eq!(CaseSensitive::find($haystack, &literal($needle)), $result);
692            };
693        }
694
695        test!("foobar", "f", Some((0, 1)));
696        test!("foobar", "foo", Some((0, 3)));
697        test!("foobar", "foobar", Some((0, 6)));
698        test!("foobar", "bar", Some((3, 3)));
699        test!("foobar", "oobar", Some((1, 5)));
700        test!("foobar", "foobar2", None);
701        test!("İ", "İ", Some((0, 2)));
702        test!("i", "i", Some((0, 1)));
703        test!("i̇", "i\u{307}", Some((0, 3)));
704        test!("i̇x", "i\u{307}x", Some((0, 4)));
705        test!("i̇x", "i\u{307}_", None);
706        test!("xi̇x", "i\u{307}", Some((1, 3)));
707        test!("xi̇ඞi̇x", "ඞ", Some((4, 3)));
708        test!("xi̇ඞi̇x", "ඞi̇", Some((4, 6)));
709    }
710
711    #[test]
712    fn test_matcher_case_sensitive_ranges_match() {
713        macro_rules! test {
714            ($c:expr, $negated:expr, [$start:literal - $end:literal], $result:expr) => {
715                assert_eq!(
716                    CaseSensitive::ranges_match($c, $negated, &range($start, $end)),
717                    $result
718                );
719            };
720        }
721
722        test!('a', false, ['a' - 'a'], true);
723        test!('a', true, ['a' - 'a'], false);
724        test!('b', false, ['a' - 'a'], false);
725        test!('b', true, ['a' - 'a'], true);
726        test!('b', false, ['a' - 'c'], true);
727        test!('b', false, ['b' - 'c'], true);
728        test!('b', false, ['a' - 'b'], true);
729
730        test!('A', false, ['a' - 'a'], false);
731        test!('ඞ', false, ['ඞ' - 'ඞ'], true);
732    }
733
734    #[test]
735    fn test_matcher_case_sensitive_ranges_find() {
736        macro_rules! test {
737            ($haystack:expr, $negated:expr, [$start:literal - $end:literal], $result:expr) => {
738                assert_eq!(
739                    CaseSensitive::ranges_find($haystack, $negated, &range($start, $end)),
740                    $result
741                );
742            };
743        }
744
745        test!("ඞaඞ", false, ['a' - 'a'], Some((3, 'a')));
746        test!("a", true, ['a' - 'a'], None);
747        test!("ඞaඞ", true, ['a' - 'a'], Some((0, 'ඞ')));
748        test!("aඞaඞ", true, ['a' - 'a'], Some((1, 'ඞ')));
749        test!("ඞbඞ", false, ['a' - 'a'], None);
750        test!("ඞbඞ", true, ['ඞ' - 'ඞ'], Some((3, 'b')));
751        test!("ඞbඞ", false, ['a' - 'c'], Some((3, 'b')));
752        test!("ඞbඞ", false, ['b' - 'c'], Some((3, 'b')));
753        test!("ඞbඞ", false, ['a' - 'b'], Some((3, 'b')));
754        test!("AAAAA", false, ['a' - 'a'], None);
755        test!("aaaaaaabb", true, ['a' - 'a'], Some((7, 'b')));
756        test!("AaaaaaAbb", false, ['b' - 'b'], Some((7, 'b')));
757    }
758
759    #[test]
760    fn test_matcher_case_insensitive_prefix() {
761        macro_rules! test {
762            ($haystack:expr, $needle:expr, $result:expr) => {
763                assert_eq!(
764                    CaseInsensitive::is_prefix($haystack, &literal_ci($needle)),
765                    $result
766                );
767            };
768        }
769
770        test!("foobar", "f", Some(1));
771        test!("foobar", "F", Some(1));
772        test!("fOobar", "foo", Some(3));
773        test!("fooBAR", "foobar", Some(6));
774        test!("foobar", "oobar", None);
775        test!("FOOBAR", "oobar", None);
776        test!("foobar", "foobar2", None);
777        test!("İ", "İ", Some(2));
778        test!("İ", "i", Some(0));
779        test!("İ", "i̇", Some(2));
780        test!("i", "İ", None);
781        test!("i", "i", Some(1));
782        test!("i̇", "i", Some(1));
783        test!("i̇", "i\u{307}", Some(3));
784        test!("i̇x", "i\u{307}", Some(3));
785        test!("i̇x", "i\u{307}x", Some(4));
786        test!("i̇x", "i\u{307}_", None);
787    }
788
789    #[test]
790    fn test_matcher_case_insensitive_find() {
791        macro_rules! test {
792            ($haystack:expr, $needle:expr, $result:expr) => {
793                assert_eq!(
794                    CaseInsensitive::find($haystack, &literal_ci($needle)),
795                    $result
796                );
797            };
798        }
799
800        test!("Foobar", "f", Some((0, 1)));
801        test!("foObar", "FOO", Some((0, 3)));
802        test!("foObar", "Foobar", Some((0, 6)));
803        test!("foObar", "bar", Some((3, 3)));
804        test!("foObarx", "bar", Some((3, 3)));
805        test!("foObarbarbar", "bar", Some((3, 3)));
806        test!("foObar", "Oobar", Some((1, 5)));
807        test!("foObar", "Foobar2", None);
808        test!("İ", "İ", Some((0, 2)));
809        test!("i", "i", Some((0, 1)));
810        test!("i̇", "i\u{307}", Some((0, 3)));
811        test!("i̇x", "i\u{307}x", Some((0, 4)));
812        test!("i̇x", "i\u{307}_", None);
813        test!("xi̇x", "i\u{307}", Some((1, 3)));
814        test!("xi̇ඞi̇x", "ඞ", Some((4, 3)));
815        test!("xi̇ඞi̇x", "ඞi̇", Some((4, 6)));
816        test!("xi̇ඞİx", "ඞi̇", Some((4, 5)));
817    }
818
819    #[test]
820    fn test_matcher_case_insensitive_ranges_match() {
821        macro_rules! test {
822            ($c:expr, $negated:expr, [$start:literal - $end:literal], $result:expr) => {
823                assert_eq!(
824                    CaseInsensitive::ranges_match($c, $negated, &range($start, $end)),
825                    $result
826                );
827            };
828        }
829
830        test!('a', false, ['a' - 'a'], true);
831        test!('a', true, ['a' - 'a'], false);
832        test!('b', false, ['a' - 'a'], false);
833        test!('b', true, ['a' - 'a'], true);
834        test!('b', false, ['a' - 'c'], true);
835        test!('b', false, ['b' - 'c'], true);
836        test!('b', false, ['a' - 'b'], true);
837
838        test!('b', false, ['A' - 'A'], false);
839        test!('b', true, ['A' - 'A'], true);
840        test!('b', false, ['A' - 'C'], true);
841        test!('b', false, ['B' - 'C'], true);
842        test!('b', false, ['A' - 'B'], true);
843
844        test!('B', false, ['a' - 'a'], false);
845        test!('B', true, ['a' - 'a'], true);
846        test!('B', false, ['a' - 'c'], true);
847        test!('B', false, ['b' - 'c'], true);
848        test!('B', false, ['a' - 'b'], true);
849
850        test!('ǧ', false, ['Ǧ' - 'Ǧ'], true);
851        test!('Ǧ', false, ['ǧ' - 'ǧ'], true);
852        test!('ǧ', true, ['Ǧ' - 'Ǧ'], false);
853        test!('Ǧ', true, ['ǧ' - 'ǧ'], false);
854
855        test!('ඞ', false, ['ඞ' - 'ඞ'], true);
856    }
857
858    #[test]
859    fn test_matcher_case_insensitive_ranges_find() {
860        macro_rules! test {
861            ($haystack:expr, $negated:expr, [$start:literal - $end:literal], $result:expr) => {
862                assert_eq!(
863                    CaseInsensitive::ranges_find($haystack, $negated, &range($start, $end)),
864                    $result
865                );
866            };
867        }
868
869        test!("ඞaඞ", false, ['a' - 'a'], Some((3, 'a')));
870        test!("a", true, ['a' - 'a'], None);
871        test!("ඞaඞ", true, ['a' - 'a'], Some((0, 'ඞ')));
872        test!("aඞaඞ", true, ['a' - 'a'], Some((1, 'ඞ')));
873        test!("ඞbඞ", false, ['a' - 'a'], None);
874        test!("ඞbඞ", true, ['ඞ' - 'ඞ'], Some((3, 'b')));
875        test!("ඞbඞ", false, ['a' - 'c'], Some((3, 'b')));
876        test!("ඞbඞ", false, ['b' - 'c'], Some((3, 'b')));
877        test!("ඞbඞ", false, ['a' - 'b'], Some((3, 'b')));
878        test!("AAAAA", false, ['a' - 'a'], Some((0, 'A')));
879        test!("aaaaaaabb", true, ['a' - 'a'], Some((7, 'b')));
880        test!("AaaaaaAbb", false, ['b' - 'b'], Some((7, 'b')));
881
882        test!("ඞBඞ", false, ['a' - 'a'], None);
883        test!("ඞBඞ", true, ['ඞ' - 'ඞ'], Some((3, 'B')));
884        test!("ඞBඞ", false, ['a' - 'c'], Some((3, 'B')));
885        test!("ඞBඞ", false, ['b' - 'c'], Some((3, 'B')));
886        test!("ඞBඞ", false, ['a' - 'b'], Some((3, 'B')));
887
888        test!("ඞbඞ", false, ['A' - 'A'], None);
889        test!("ඞbඞ", true, ['ඞ' - 'ඞ'], Some((3, 'b')));
890        test!("ඞbඞ", false, ['A' - 'C'], Some((3, 'b')));
891        test!("ඞbඞ", false, ['B' - 'C'], Some((3, 'b')));
892        test!("ඞbඞ", false, ['A' - 'B'], Some((3, 'b')));
893
894        test!("fඞoǧbar", false, ['ǧ' - 'ǧ'], Some((5, 'ǧ')));
895        test!("fඞoǧbar", false, ['Ǧ' - 'Ǧ'], Some((5, 'ǧ')));
896        test!("fඞoǦbar", false, ['ǧ' - 'ǧ'], Some((5, 'Ǧ')));
897    }
898}