1use regex::bytes::RegexBuilder as BytesRegexBuilder;
2use regex::{Match, Regex};
3use relay_event_schema::processor::{FieldAttrs, Pii, ProcessingState, ValueType};
4use smallvec::SmallVec;
5use std::borrow::Cow;
6use std::iter::FusedIterator;
7use utf16string::{LittleEndian, WStr};
8
9use crate::compiledconfig::RuleRef;
10use crate::regexes::{get_regex_for_rule_type, ReplaceBehavior};
11use crate::{transform, utils, CompiledPiiConfig, JsonScrubError, JsonScrubVisitor, Redaction};
12
13const MIN_STRING_LEN: usize = 5;
19
20fn apply_regex_to_utf8_bytes(
21 data: &mut [u8],
22 rule: &RuleRef,
23 regex: &Regex,
24 replace_behavior: &ReplaceBehavior,
25) -> SmallVec<[(usize, usize); 1]> {
26 let mut matches = SmallVec::<[(usize, usize); 1]>::new();
27
28 let regex = match BytesRegexBuilder::new(regex.as_str())
29 .unicode(false)
31 .multi_line(false)
32 .dot_matches_new_line(true)
33 .build()
34 {
35 Ok(x) => x,
36 Err(e) => {
37 relay_log::error!(
44 error = &e as &dyn std::error::Error,
45 pattern = regex.as_str(),
46 "Regex failed to compile in non-unicode mode",
47 );
48 return matches;
49 }
50 };
51
52 for captures in regex.captures_iter(data) {
53 for (idx, group) in captures.iter().enumerate() {
54 if let Some(group) = group {
55 if group.start() == group.end() {
56 continue;
57 }
58
59 match replace_behavior {
60 ReplaceBehavior::Groups(ref replace_groups) => {
61 if replace_groups.contains(&(idx as u8)) {
62 matches.push((group.start(), group.end()));
63 }
64 }
65 ReplaceBehavior::Value => {
66 matches.push((0, data.len()));
67 break;
68 }
69 }
70 }
71 }
72 }
73
74 for (start, end) in matches.iter() {
75 data[*start..*end].apply_redaction(&rule.redaction);
76 }
77 matches
78}
79
80fn apply_regex_to_utf16le_bytes(
81 data: &mut [u8],
82 rule: &RuleRef,
83 regex: &Regex,
84 replace_behavior: &ReplaceBehavior,
85) -> bool {
86 let mut changed = false;
87 for segment in WStrSegmentIter::new(data) {
88 match replace_behavior {
89 ReplaceBehavior::Value => {
90 for re_match in regex.find_iter(&segment.decoded) {
91 changed = true;
92 let match_wstr = get_wstr_match(&segment.decoded, re_match, segment.encoded);
93 match_wstr.apply_redaction(&rule.redaction);
94 }
95 }
96 ReplaceBehavior::Groups(ref replace_groups) => {
97 for captures in regex.captures_iter(&segment.decoded) {
98 for group_idx in replace_groups.iter() {
99 if let Some(re_match) = captures.get(*group_idx as usize) {
100 changed = true;
101 let match_wstr =
102 get_wstr_match(&segment.decoded, re_match, segment.encoded);
103 match_wstr.apply_redaction(&rule.redaction);
104 }
105 }
106 }
107 }
108 }
109 }
110 changed
111}
112
113fn get_wstr_match<'a>(
115 all_text: &str,
116 re_match: Match,
117 all_encoded: &'a mut WStr<LittleEndian>,
118) -> &'a mut WStr<LittleEndian> {
119 let mut encoded_start = 0;
120 let mut encoded_end = all_encoded.len();
121
122 let offsets_iter = all_text.char_indices().zip(all_encoded.char_indices());
123 for ((text_offset, _text_char), (encoded_offset, _encoded_char)) in offsets_iter {
124 if text_offset == re_match.start() {
125 encoded_start = encoded_offset;
126 }
127 if text_offset == re_match.end() {
128 encoded_end = encoded_offset;
129 break;
130 }
131 }
132 &mut all_encoded[encoded_start..encoded_end]
133}
134
135trait StringMods: AsRef<[u8]> {
137 fn fill_content(&mut self, fill_char: char);
144
145 fn swap_content(&mut self, replacement: &str, padding: char);
160
161 fn apply_redaction(&mut self, redaction: &Redaction) {
163 const PADDING: char = '*';
164 const MASK: char = '*';
165
166 match redaction {
167 Redaction::Default | Redaction::Remove => {
168 self.fill_content(PADDING);
169 }
170 Redaction::Mask => {
171 self.fill_content(MASK);
172 }
173 Redaction::Hash => {
174 let hashed = utils::hash_value(self.as_ref());
175 self.swap_content(&hashed, PADDING);
176 }
177 Redaction::Replace(ref replace) => {
178 self.swap_content(replace.text.as_str(), PADDING);
179 }
180 Redaction::Other => relay_log::warn!("Incoming redaction is not supported"),
181 }
182 }
183}
184
185impl StringMods for WStr<LittleEndian> {
186 fn fill_content(&mut self, fill_char: char) {
187 let mut buf = [0u16; 1];
190 let fill_u16 = fill_char.encode_utf16(&mut buf[..]);
191 let fill_buf = fill_u16[0].to_le_bytes();
192
193 unsafe {
194 let chunks = self
195 .as_bytes_mut()
196 .chunks_exact_mut(std::mem::size_of::<u16>());
197 for chunk in chunks {
198 chunk.copy_from_slice(&fill_buf);
199 }
200 }
201 }
202
203 fn swap_content(&mut self, replacement: &str, padding: char) {
204 let len = self.len();
207
208 let mut buf = [0u16; 1];
209 padding.encode_utf16(&mut buf[..]);
210 let fill_buf = buf[0].to_le_bytes();
211
212 let mut offset = 0;
213 for code in replacement.encode_utf16() {
214 let char_len = if 0xD800 & code == 0xD800 {
215 std::mem::size_of::<u16>() * 2 } else {
217 std::mem::size_of::<u16>()
218 };
219 if (len - offset) < char_len {
220 break; }
222 unsafe {
223 let target = &mut self.as_bytes_mut()[offset..offset + std::mem::size_of::<u16>()];
224 target.copy_from_slice(&code.to_le_bytes());
225 }
226 offset += std::mem::size_of::<u16>();
227 }
228
229 unsafe {
230 let remainder_bytes = &mut self.as_bytes_mut()[offset..];
231 let chunks = remainder_bytes.chunks_exact_mut(std::mem::size_of::<u16>());
232 for chunk in chunks {
233 chunk.copy_from_slice(&fill_buf);
234 }
235 }
236 }
237}
238
239impl StringMods for [u8] {
240 fn fill_content(&mut self, fill_char: char) {
241 let mut buf = [0u8; 1];
244 fill_char.encode_utf8(&mut buf[..]);
245 for byte in self {
246 *byte = buf[0];
247 }
248 }
249
250 fn swap_content(&mut self, replacement: &str, padding: char) {
251 let mut buf = [0u8; 1];
254 padding.encode_utf8(&mut buf[..]);
255
256 let cutoff = replacement.len().min(self.len());
257 let (left, right) = self.split_at_mut(cutoff);
258 left.copy_from_slice(&replacement.as_bytes()[..cutoff]);
259
260 for byte in right {
261 *byte = buf[0];
262 }
263 }
264}
265
266struct WStrSegmentIter<'a> {
272 data: &'a mut [u8],
273 offset: usize,
274}
275
276impl<'a> WStrSegmentIter<'a> {
277 fn new(data: &'a mut [u8]) -> Self {
278 Self { data, offset: 0 }
279 }
280}
281
282impl<'a> Iterator for WStrSegmentIter<'a> {
283 type Item = WStrSegment<'a>;
284
285 fn next(&mut self) -> Option<Self::Item> {
286 loop {
287 if self.offset >= self.data.len() {
288 return None;
289 }
290
291 let slice = match WStr::from_utf16le_mut(&mut self.data[self.offset..]) {
292 Ok(wstr) => {
293 self.offset += wstr.len();
294 unsafe { wstr.as_bytes_mut() }
295 }
296 Err(err) => {
297 let start = self.offset;
298 let end = start + err.valid_up_to();
299 match err.error_len() {
300 Some(len) => self.offset += err.valid_up_to() + len,
301 None => self.offset = self.data.len(),
302 }
303 &mut self.data[start..end]
304 }
305 };
306
307 let ptr = slice.as_mut_ptr();
312 let len = slice.len();
313 let encoded = unsafe {
314 WStr::from_utf16le_unchecked_mut(std::slice::from_raw_parts_mut(ptr, len))
315 };
316
317 if encoded.chars().take(MIN_STRING_LEN).count() < MIN_STRING_LEN {
318 continue;
319 }
320 let decoded = encoded.to_utf8();
321 return Some(WStrSegment { encoded, decoded });
322 }
323 }
324}
325
326impl FusedIterator for WStrSegmentIter<'_> {}
327
328struct WStrSegment<'a> {
337 encoded: &'a mut WStr<LittleEndian>,
339 decoded: String,
341}
342
343pub struct PiiAttachmentsProcessor<'a> {
345 compiled_config: &'a CompiledPiiConfig,
346 root_state: ProcessingState<'static>,
347}
348
349pub enum ScrubEncodings {
351 Utf8,
353
354 Utf16Le,
356
357 All,
359}
360
361impl<'a> PiiAttachmentsProcessor<'a> {
362 pub fn new(compiled_config: &'a CompiledPiiConfig) -> Self {
364 let root_state =
368 ProcessingState::root().enter_static("", None, Some(ValueType::Attachments));
369
370 PiiAttachmentsProcessor {
371 compiled_config,
372 root_state,
373 }
374 }
375
376 pub(crate) fn state<'s>(
378 &'s self,
379 filename: &'s str,
380 value_type: ValueType,
381 ) -> ProcessingState<'s> {
382 self.root_state.enter_borrowed(
383 filename,
384 Some(Cow::Owned(FieldAttrs::new().pii(Pii::True))),
385 Some(value_type),
386 )
387 }
388
389 pub(crate) fn scrub_bytes(
393 &self,
394 data: &mut [u8],
395 state: &ProcessingState<'_>,
396 encodings: ScrubEncodings,
397 ) -> bool {
398 let pii = state.attrs().pii;
399 if pii == Pii::False {
400 return false;
401 }
402
403 let mut changed = false;
404
405 for (selector, rules) in &self.compiled_config.applications {
406 if selector.matches_path(&state.path()) {
407 for rule in rules {
408 for (_pattern_type, regex, replace_behavior) in
418 get_regex_for_rule_type(&rule.ty)
419 {
420 match encodings {
421 ScrubEncodings::Utf8 => {
422 let matches =
423 apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
424 changed |= !(matches.is_empty());
425 }
426 ScrubEncodings::Utf16Le => {
427 changed |= apply_regex_to_utf16le_bytes(
428 data,
429 rule,
430 regex,
431 &replace_behavior,
432 );
433 }
434 ScrubEncodings::All => {
435 let matches =
436 apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
437 changed |= !(matches.is_empty());
438
439 let unscrubbed_ranges = matches
442 .into_iter()
443 .chain(std::iter::once((data.len(), 0)))
444 .scan((0usize, 0usize), |previous, current| {
445 let start = if previous.1 % 2 == 0 {
446 previous.1
447 } else {
448 previous.1 + 1
449 };
450 let item = (start, current.0);
451 *previous = current;
452 Some(item)
453 })
454 .filter(|(start, end)| end > start);
455 for (start, end) in unscrubbed_ranges {
456 changed |= apply_regex_to_utf16le_bytes(
457 &mut data[start..end],
458 rule,
459 regex,
460 &replace_behavior,
461 );
462 }
463 }
464 }
465 }
466 }
467 }
468 }
469
470 changed
471 }
472
473 pub fn scrub_attachment(&self, filename: &str, data: &mut [u8]) -> bool {
477 let state = self.state(filename, ValueType::Binary);
478 self.scrub_bytes(data, &state, ScrubEncodings::All)
479 }
480
481 pub fn scrub_utf8_filepath(&self, path: &mut str, state: &ProcessingState<'_>) -> bool {
483 if let Some(index) = path.rfind(['/', '\\']) {
484 let data = unsafe { &mut path.as_bytes_mut()[..index] };
485 self.scrub_bytes(data, state, ScrubEncodings::Utf8)
486 } else {
487 false
488 }
489 }
490
491 pub fn scrub_utf16_filepath(
493 &self,
494 path: &mut WStr<LittleEndian>,
495 state: &ProcessingState<'_>,
496 ) -> bool {
497 let index =
498 path.char_indices().rev().find_map(
499 |(i, c)| {
500 if c == '/' || c == '\\' {
501 Some(i)
502 } else {
503 None
504 }
505 },
506 );
507
508 if let Some(index) = index {
509 let data = unsafe { &mut path.as_bytes_mut()[..index] };
510 self.scrub_bytes(data, state, ScrubEncodings::Utf16Le)
511 } else {
512 false
513 }
514 }
515
516 pub fn scrub_json(&self, payload: &[u8]) -> Result<Vec<u8>, JsonScrubError> {
524 let output = Vec::new();
525
526 let visitor = JsonScrubVisitor::new(self.compiled_config);
527
528 let mut deserializer_inner = serde_json::Deserializer::from_slice(payload);
529 let deserializer = transform::Deserializer::new(&mut deserializer_inner, visitor);
530
531 let mut serializer = serde_json::Serializer::new(output);
532 serde_transcode::transcode(deserializer, &mut serializer)
533 .map_err(|_| JsonScrubError::TranscodeFailed)?;
534 Ok(serializer.into_inner())
535 }
536}
537
538#[cfg(test)]
539mod tests {
540 use itertools::Itertools;
541
542 use super::*;
543 use crate::PiiConfig;
544
545 enum AttachmentBytesTestCase<'a> {
546 Builtin {
547 selector: &'a str,
548 rule: &'a str,
549 filename: &'a str,
550 value_type: ValueType,
551 input: &'a [u8],
552 output: &'a [u8],
553 changed: bool,
554 },
555 Regex {
556 selector: &'a str,
557 regex: &'a str,
558 filename: &'a str,
559 value_type: ValueType,
560 input: &'a [u8],
561 output: &'a [u8],
562 changed: bool,
563 },
564 }
565
566 impl AttachmentBytesTestCase<'_> {
567 fn run(self) {
568 let (config, filename, value_type, input, expected, changed) = match self {
569 AttachmentBytesTestCase::Builtin {
570 selector,
571 rule,
572 filename,
573 value_type,
574 input,
575 output,
576 changed,
577 } => {
578 let config = serde_json::from_value::<PiiConfig>(serde_json::json!(
579 {
580 "applications": {
581 selector: [rule]
582 }
583 }
584 ))
585 .unwrap();
586 (config, filename, value_type, input, output, changed)
587 }
588 AttachmentBytesTestCase::Regex {
589 selector,
590 regex,
591 filename,
592 value_type,
593 input,
594 output,
595 changed,
596 } => {
597 let config = serde_json::from_value::<PiiConfig>(serde_json::json!(
598 {
599 "rules": {
600 "custom": {
601 "type": "pattern",
602 "pattern": regex,
603 "redaction": {
604 "method": "remove"
605 }
606 }
607 },
608 "applications": {
609 selector: ["custom"]
610 }
611 }
612 ))
613 .unwrap();
614 (config, filename, value_type, input, output, changed)
615 }
616 };
617
618 let mut actual = input.to_owned();
619 let processor = PiiAttachmentsProcessor::new(config.compiled());
620 let state = processor.state(filename, value_type);
621 let has_changed = processor.scrub_bytes(&mut actual, &state, ScrubEncodings::All);
622
623 assert!(
624 actual == expected,
625 "`actual == expected` in line {}:\n{}\n{}",
626 line!(),
627 pretty_hex::pretty_hex(&actual),
628 pretty_hex::pretty_hex(&expected),
629 );
630
631 assert_eq!(changed, has_changed);
632 }
633 }
634
635 fn utf16le(s: &str) -> Vec<u8> {
636 s.encode_utf16()
637 .map(|u| u.to_le_bytes())
638 .collect::<Vec<[u8; 2]>>()
639 .iter()
640 .flatten()
641 .copied()
642 .collect()
643 }
644
645 #[test]
646 fn test_ip_replace_padding() {
647 AttachmentBytesTestCase::Builtin {
648 selector: "$binary",
649 rule: "@ip",
650 filename: "foo.txt",
651 value_type: ValueType::Binary,
652 input: b"before 127.0.0.1 after",
653 output: b"before [ip]***** after",
654 changed: true,
655 }
656 .run();
657 }
658
659 #[test]
660 fn test_ip_replace_padding_utf16() {
661 AttachmentBytesTestCase::Builtin {
662 selector: "$binary",
663 rule: "@ip",
664 filename: "foo.txt",
665 value_type: ValueType::Binary,
666 input: utf16le("before 127.0.0.1 after").as_slice(),
667 output: utf16le("before [ip]***** after").as_slice(),
668 changed: true,
669 }
670 .run();
671 }
672
673 #[test]
674 fn test_ip_hash_trunchating() {
675 AttachmentBytesTestCase::Builtin {
676 selector: "$binary",
677 rule: "@ip:hash",
678 filename: "foo.txt",
679 value_type: ValueType::Binary,
680 input: b"before 127.0.0.1 after",
681 output: b"before AE12FE3B5 after",
682 changed: true,
683 }
684 .run();
685 }
686
687 #[test]
688 fn test_ip_hash_trunchating_utf16() {
689 AttachmentBytesTestCase::Builtin {
690 selector: "$binary",
691 rule: "@ip:hash",
692 filename: "foo.txt",
693 value_type: ValueType::Binary,
694 input: utf16le("before 127.0.0.1 after").as_slice(),
695 output: utf16le("before 3FA8F5A46 after").as_slice(),
696 changed: true,
697 }
698 .run();
699 }
700
701 #[test]
702 fn test_ip_masking() {
703 AttachmentBytesTestCase::Builtin {
704 selector: "$binary",
705 rule: "@ip:mask",
706 filename: "foo.txt",
707 value_type: ValueType::Binary,
708 input: b"before 127.0.0.1 after",
709 output: b"before ********* after",
710 changed: true,
711 }
712 .run();
713 }
714
715 #[test]
716 fn test_ip_masking_utf16() {
717 AttachmentBytesTestCase::Builtin {
718 selector: "$binary",
719 rule: "@ip:mask",
720 filename: "foo.txt",
721 value_type: ValueType::Binary,
722 input: utf16le("before 127.0.0.1 after").as_slice(),
723 output: utf16le("before ********* after").as_slice(),
724 changed: true,
725 }
726 .run();
727 }
728
729 #[test]
730 fn test_ip_removing() {
731 AttachmentBytesTestCase::Builtin {
732 selector: "$binary",
733 rule: "@ip:remove",
734 filename: "foo.txt",
735 value_type: ValueType::Binary,
736 input: b"before 127.0.0.1 after",
737 output: b"before ********* after",
738 changed: true,
739 }
740 .run();
741 }
742
743 #[test]
744 fn test_ip_removing_utf16() {
745 AttachmentBytesTestCase::Builtin {
746 selector: "$binary",
747 rule: "@ip:remove",
748 filename: "foo.txt",
749 value_type: ValueType::Binary,
750 input: utf16le("before 127.0.0.1 after").as_slice(),
751 output: utf16le("before ********* after").as_slice(),
752 changed: true,
753 }
754 .run();
755 }
756
757 #[test]
758 fn test_selectors() {
759 for wrong_selector in &[
760 "$string",
761 "$number",
762 "$attachments.* && $string",
763 "$attachments",
764 "** && !$binary",
765 ] {
766 AttachmentBytesTestCase::Builtin {
767 selector: wrong_selector,
768 rule: "@ip:mask",
769 filename: "foo.txt",
770 value_type: ValueType::Binary,
771 input: b"before 127.0.0.1 after",
772 output: b"before 127.0.0.1 after",
773 changed: false,
774 }
775 .run();
776 }
777 }
778
779 #[test]
780 fn test_all_the_bytes() {
781 AttachmentBytesTestCase::Builtin {
782 selector: "$binary",
783 rule: "@anything:remove",
784 filename: "foo.txt",
785 value_type: ValueType::Binary,
786 input: (0..255u8).collect::<Vec<_>>().as_slice(),
787 output: &[b'*'; 255],
788 changed: true,
789 }
790 .run();
791 }
792
793 #[test]
794 fn test_bytes_regexes() {
795 let samples: &[&[u8]] = &[
799 b"\xc3\x28", b"\xa0\xa1", b"\xe2\x28\xa1", b"\xe2\x82\x28", b"\xf0\x28\x8c\xbc", b"\xf0\x90\x28\xbc", b"\xf0\x28\x8c\x28", b"\xf8\xa1\xa1\xa1\xa1", b"\xfc\xa1\xa1\xa1\xa1\xa1", ];
809
810 for bytes in samples {
811 assert!(String::from_utf8(bytes.to_vec()).is_err());
812
813 AttachmentBytesTestCase::Regex {
814 selector: "$binary",
815 regex: &bytes.iter().map(|x| format!("\\x{x:02x}")).join(""),
816 filename: "foo.txt",
817 value_type: ValueType::Binary,
818 input: bytes,
819 output: &vec![b'*'; bytes.len()],
820 changed: true,
821 }
822 .run()
823 }
824 }
825
826 #[test]
827 fn test_segments_all_data() {
828 let mut data = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
829 let mut iter = WStrSegmentIter::new(&mut data[..]);
830
831 let segment = iter.next().unwrap();
832 assert_eq!(segment.decoded, "hello");
833 assert_eq!(segment.encoded.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
834
835 assert!(iter.next().is_none());
836 }
837
838 #[test]
839 fn test_segments_middle_2_byte_aligned() {
840 let mut data = Vec::from(&b"\xd8\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
841 let mut iter = WStrSegmentIter::new(&mut data[..]);
842
843 let segment = iter.next().unwrap();
844 assert_eq!(segment.decoded, "hello");
845 assert_eq!(segment.encoded.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
846
847 assert!(iter.next().is_none());
848 }
849
850 #[test]
851 fn test_segments_middle_2_byte_aligned_mutation() {
852 let mut data = Vec::from(&b"\xd8\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
853 let mut iter = WStrSegmentIter::new(&mut data[..]);
854
855 let segment = iter.next().unwrap();
856 unsafe {
857 segment
858 .encoded
859 .as_bytes_mut()
860 .copy_from_slice(&b"w\x00o\x00r\x00l\x00d\x00"[..]);
861 }
862
863 assert!(iter.next().is_none());
864
865 assert_eq!(data, b"\xd8\xd8\xd8\xd8w\x00o\x00r\x00l\x00d\x00\xd8\xd8");
866 }
867
868 #[test]
869 fn test_segments_middle_unaligned() {
870 let mut data = Vec::from(&b"\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
871 let mut iter = WStrSegmentIter::new(&mut data);
872
873 let segment = iter.next().unwrap();
875 assert_eq!(segment.decoded, "棘攀氀氀漀");
876
877 assert!(iter.next().is_none());
878 }
879
880 #[test]
881 fn test_segments_end_aligned() {
882 let mut data = Vec::from(&b"\xd8\xd8h\x00e\x00l\x00l\x00o\x00"[..]);
883 let mut iter = WStrSegmentIter::new(&mut data);
884
885 let segment = iter.next().unwrap();
886 assert_eq!(segment.decoded, "hello");
887
888 assert!(iter.next().is_none());
889 }
890
891 #[test]
892 fn test_segments_garbage() {
893 let mut data = Vec::from(&b"\xd8\xd8"[..]);
894 let mut iter = WStrSegmentIter::new(&mut data);
895
896 assert!(iter.next().is_none());
897 }
898
899 #[test]
900 fn test_segments_too_short() {
901 let mut data = Vec::from(&b"\xd8\xd8y\x00o\x00\xd8\xd8h\x00e\x00l\x00l\x00o\x00"[..]);
902 let mut iter = WStrSegmentIter::new(&mut data);
903
904 let segment = iter.next().unwrap();
905 assert_eq!(segment.decoded, "hello");
906
907 assert!(iter.next().is_none());
908 }
909
910 #[test]
911 fn test_segments_multiple() {
912 let mut data =
913 Vec::from(&b"\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8w\x00o\x00r\x00l\x00d\x00"[..]);
914
915 let mut iter = WStrSegmentIter::new(&mut data);
916
917 let segment = iter.next().unwrap();
918 assert_eq!(segment.decoded, "hello");
919
920 let segment = iter.next().unwrap();
921 assert_eq!(segment.decoded, "world");
922
923 assert!(iter.next().is_none());
924 }
925
926 #[test]
927 fn test_fill_content_wstr() {
928 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
929 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
930 s.fill_content('x');
931 assert_eq!(b.as_slice(), b"x\x00x\x00x\x00x\x00x\x00");
932 }
933
934 #[test]
935 #[should_panic]
936 fn test_fill_content_wstr_panic() {
937 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
938 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
939 s.fill_content('\u{10000}');
940 }
941
942 #[test]
943 fn test_swap_content_wstr() {
944 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
946 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
947 s.swap_content("world", 'x');
948 assert_eq!(b.as_slice(), b"w\x00o\x00r\x00l\x00d\x00");
949
950 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
952 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
953 s.swap_content("hey", 'x');
954 assert_eq!(b.as_slice(), b"h\x00e\x00y\x00x\x00x\x00");
955
956 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
958 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
959 s.swap_content("world", 'x');
960 assert_eq!(b.as_slice(), b"w\x00o\x00r\x00");
961
962 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
964 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
965 s.swap_content("yo\u{10000}", 'x');
966 assert_eq!(b.as_slice(), b"y\x00o\x00x\x00");
967 }
968
969 #[test]
970 #[should_panic]
971 fn test_swap_content_wstr_panic() {
972 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
973 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
974 s.swap_content("yo", '\u{10000}');
975 }
976
977 #[test]
978 #[allow(clippy::trivial_regex)]
979 fn test_get_wstr_match() {
980 let s = "hello there";
981 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00 \x00t\x00h\x00e\x00r\x00e\x00"[..]);
982 let w = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
983
984 let re = Regex::new("hello").unwrap();
986 let re_match = re.find(s).unwrap();
987 let m = get_wstr_match(s, re_match, w);
988 assert_eq!(m.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
989
990 let re = Regex::new(".*").unwrap();
992 let re_match = re.find(s).unwrap();
993 let m = get_wstr_match(s, re_match, w);
994 assert_eq!(
995 m.as_bytes(),
996 b"h\x00e\x00l\x00l\x00o\x00 \x00t\x00h\x00e\x00r\x00e\x00"
997 );
998 }
999}