1use regex::bytes::RegexBuilder as BytesRegexBuilder;
2use regex::{Match, Regex};
3use relay_event_schema::processor::{FieldAttrs, Pii, ProcessingState, ValueType};
4use smallvec::SmallVec;
5use std::borrow::Cow;
6use std::iter::FusedIterator;
7use utf16string::{LittleEndian, WStr};
8
9use crate::compiledconfig::RuleRef;
10use crate::regexes::{ReplaceBehavior, get_regex_for_rule_type};
11use crate::{CompiledPiiConfig, JsonScrubError, JsonScrubVisitor, Redaction, transform, utils};
12
13const MIN_STRING_LEN: usize = 5;
19
20fn apply_regex_to_utf8_bytes(
21 data: &mut [u8],
22 rule: &RuleRef,
23 regex: &Regex,
24 replace_behavior: &ReplaceBehavior,
25) -> SmallVec<[(usize, usize); 1]> {
26 let mut matches = SmallVec::<[(usize, usize); 1]>::new();
27
28 let regex = match BytesRegexBuilder::new(regex.as_str())
29 .unicode(false)
31 .multi_line(false)
32 .dot_matches_new_line(true)
33 .build()
34 {
35 Ok(x) => x,
36 Err(e) => {
37 relay_log::error!(
44 error = &e as &dyn std::error::Error,
45 pattern = regex.as_str(),
46 "Regex failed to compile in non-unicode mode",
47 );
48 return matches;
49 }
50 };
51
52 for captures in regex.captures_iter(data) {
53 for (idx, group) in captures.iter().enumerate() {
54 if let Some(group) = group {
55 if group.start() == group.end() {
56 continue;
57 }
58
59 match replace_behavior {
60 ReplaceBehavior::Groups(replace_groups) => {
61 if replace_groups.contains(&(idx as u8)) {
62 matches.push((group.start(), group.end()));
63 }
64 }
65 ReplaceBehavior::Value => {
66 matches.push((0, data.len()));
67 break;
68 }
69 }
70 }
71 }
72 }
73
74 for (start, end) in matches.iter() {
75 data[*start..*end].apply_redaction(&rule.redaction);
76 }
77 matches
78}
79
80fn apply_regex_to_utf16le_bytes(
81 data: &mut [u8],
82 rule: &RuleRef,
83 regex: &Regex,
84 replace_behavior: &ReplaceBehavior,
85) -> bool {
86 let mut changed = false;
87 for segment in WStrSegmentIter::new(data) {
88 match replace_behavior {
89 ReplaceBehavior::Value => {
90 for re_match in regex.find_iter(&segment.decoded) {
91 changed = true;
92 let match_wstr = get_wstr_match(&segment.decoded, re_match, segment.encoded);
93 match_wstr.apply_redaction(&rule.redaction);
94 }
95 }
96 ReplaceBehavior::Groups(replace_groups) => {
97 for captures in regex.captures_iter(&segment.decoded) {
98 for group_idx in replace_groups.iter() {
99 if let Some(re_match) = captures.get(*group_idx as usize) {
100 changed = true;
101 let match_wstr =
102 get_wstr_match(&segment.decoded, re_match, segment.encoded);
103 match_wstr.apply_redaction(&rule.redaction);
104 }
105 }
106 }
107 }
108 }
109 }
110 changed
111}
112
113fn get_wstr_match<'a>(
115 all_text: &str,
116 re_match: Match,
117 all_encoded: &'a mut WStr<LittleEndian>,
118) -> &'a mut WStr<LittleEndian> {
119 let mut encoded_start = 0;
120 let mut encoded_end = all_encoded.len();
121
122 let offsets_iter = all_text.char_indices().zip(all_encoded.char_indices());
123 for ((text_offset, _text_char), (encoded_offset, _encoded_char)) in offsets_iter {
124 if text_offset == re_match.start() {
125 encoded_start = encoded_offset;
126 }
127 if text_offset == re_match.end() {
128 encoded_end = encoded_offset;
129 break;
130 }
131 }
132 &mut all_encoded[encoded_start..encoded_end]
133}
134
135trait StringMods: AsRef<[u8]> {
137 fn fill_content(&mut self, fill_char: char);
144
145 fn swap_content(&mut self, replacement: &str, padding: char);
160
161 fn apply_redaction(&mut self, redaction: &Redaction) {
163 const PADDING: char = '*';
164 const MASK: char = '*';
165
166 match redaction {
167 Redaction::Default | Redaction::Remove => {
168 self.fill_content(PADDING);
169 }
170 Redaction::Mask => {
171 self.fill_content(MASK);
172 }
173 Redaction::Hash => {
174 let hashed = utils::hash_value(self.as_ref());
175 self.swap_content(&hashed, PADDING);
176 }
177 Redaction::Replace(replace) => {
178 self.swap_content(replace.text.as_str(), PADDING);
179 }
180 Redaction::Other => relay_log::warn!("Incoming redaction is not supported"),
181 }
182 }
183}
184
185impl StringMods for WStr<LittleEndian> {
186 fn fill_content(&mut self, fill_char: char) {
187 let mut buf = [0u16; 1];
190 let fill_u16 = fill_char.encode_utf16(&mut buf[..]);
191 let fill_buf = fill_u16[0].to_le_bytes();
192
193 unsafe {
194 let chunks = self
195 .as_bytes_mut()
196 .chunks_exact_mut(std::mem::size_of::<u16>());
197 for chunk in chunks {
198 chunk.copy_from_slice(&fill_buf);
199 }
200 }
201 }
202
203 fn swap_content(&mut self, replacement: &str, padding: char) {
204 let len = self.len();
207
208 let mut buf = [0u16; 1];
209 padding.encode_utf16(&mut buf[..]);
210 let fill_buf = buf[0].to_le_bytes();
211
212 let mut offset = 0;
213 for code in replacement.encode_utf16() {
214 let char_len = if 0xD800 & code == 0xD800 {
215 std::mem::size_of::<u16>() * 2 } else {
217 std::mem::size_of::<u16>()
218 };
219 if (len - offset) < char_len {
220 break; }
222 unsafe {
223 let target = &mut self.as_bytes_mut()[offset..offset + std::mem::size_of::<u16>()];
224 target.copy_from_slice(&code.to_le_bytes());
225 }
226 offset += std::mem::size_of::<u16>();
227 }
228
229 unsafe {
230 let remainder_bytes = &mut self.as_bytes_mut()[offset..];
231 let chunks = remainder_bytes.chunks_exact_mut(std::mem::size_of::<u16>());
232 for chunk in chunks {
233 chunk.copy_from_slice(&fill_buf);
234 }
235 }
236 }
237}
238
239impl StringMods for [u8] {
240 fn fill_content(&mut self, fill_char: char) {
241 let mut buf = [0u8; 1];
244 fill_char.encode_utf8(&mut buf[..]);
245 for byte in self {
246 *byte = buf[0];
247 }
248 }
249
250 fn swap_content(&mut self, replacement: &str, padding: char) {
251 let mut buf = [0u8; 1];
254 padding.encode_utf8(&mut buf[..]);
255
256 let cutoff = replacement.len().min(self.len());
257 let (left, right) = self.split_at_mut(cutoff);
258 left.copy_from_slice(&replacement.as_bytes()[..cutoff]);
259
260 for byte in right {
261 *byte = buf[0];
262 }
263 }
264}
265
266struct WStrSegmentIter<'a> {
272 data: &'a mut [u8],
273 offset: usize,
274}
275
276impl<'a> WStrSegmentIter<'a> {
277 fn new(data: &'a mut [u8]) -> Self {
278 Self { data, offset: 0 }
279 }
280}
281
282impl<'a> Iterator for WStrSegmentIter<'a> {
283 type Item = WStrSegment<'a>;
284
285 fn next(&mut self) -> Option<Self::Item> {
286 loop {
287 if self.offset >= self.data.len() {
288 return None;
289 }
290
291 let slice = match WStr::from_utf16le_mut(&mut self.data[self.offset..]) {
292 Ok(wstr) => {
293 self.offset += wstr.len();
294 unsafe { wstr.as_bytes_mut() }
295 }
296 Err(err) => {
297 let start = self.offset;
298 let end = start + err.valid_up_to();
299 match err.error_len() {
300 Some(len) => self.offset += err.valid_up_to() + len,
301 None => self.offset = self.data.len(),
302 }
303 &mut self.data[start..end]
304 }
305 };
306
307 let ptr = slice.as_mut_ptr();
312 let len = slice.len();
313 let encoded = unsafe {
314 WStr::from_utf16le_unchecked_mut(std::slice::from_raw_parts_mut(ptr, len))
315 };
316
317 if encoded.chars().take(MIN_STRING_LEN).count() < MIN_STRING_LEN {
318 continue;
319 }
320 let decoded = encoded.to_utf8();
321 return Some(WStrSegment { encoded, decoded });
322 }
323 }
324}
325
326impl FusedIterator for WStrSegmentIter<'_> {}
327
328struct WStrSegment<'a> {
337 encoded: &'a mut WStr<LittleEndian>,
339 decoded: String,
341}
342
343pub struct PiiAttachmentsProcessor<'a> {
345 compiled_config: &'a CompiledPiiConfig,
346 root_state: ProcessingState<'static>,
347}
348
349pub enum ScrubEncodings {
351 Utf8,
353
354 Utf16Le,
356
357 All,
359}
360
361impl<'a> PiiAttachmentsProcessor<'a> {
362 pub fn new(compiled_config: &'a CompiledPiiConfig) -> Self {
364 let root_state =
368 ProcessingState::root().enter_static("", None, Some(ValueType::Attachments));
369
370 PiiAttachmentsProcessor {
371 compiled_config,
372 root_state,
373 }
374 }
375
376 pub(crate) fn state<'s>(
378 &'s self,
379 filename: &'s str,
380 value_type: ValueType,
381 ) -> ProcessingState<'s> {
382 self.root_state.enter_borrowed(
383 filename,
384 Some(Cow::Owned(FieldAttrs::new().pii(Pii::True))),
385 Some(value_type),
386 )
387 }
388
389 pub(crate) fn scrub_bytes(
393 &self,
394 data: &mut [u8],
395 state: &ProcessingState<'_>,
396 encodings: ScrubEncodings,
397 ) -> bool {
398 let pii = state.attrs().pii;
399 if pii == Pii::False {
400 return false;
401 }
402
403 let mut changed = false;
404
405 for (selector, rules) in &self.compiled_config.applications {
406 if selector.matches_path(&state.path()) {
407 for rule in rules {
408 for (_pattern_type, regex, replace_behavior) in
418 get_regex_for_rule_type(&rule.ty)
419 {
420 match encodings {
421 ScrubEncodings::Utf8 => {
422 let matches =
423 apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
424 changed |= !(matches.is_empty());
425 }
426 ScrubEncodings::Utf16Le => {
427 changed |= apply_regex_to_utf16le_bytes(
428 data,
429 rule,
430 regex,
431 &replace_behavior,
432 );
433 }
434 ScrubEncodings::All => {
435 let matches =
436 apply_regex_to_utf8_bytes(data, rule, regex, &replace_behavior);
437 changed |= !(matches.is_empty());
438
439 let unscrubbed_ranges = matches
442 .into_iter()
443 .chain(std::iter::once((data.len(), 0)))
444 .scan((0usize, 0usize), |previous, current| {
445 let start = if previous.1 % 2 == 0 {
446 previous.1
447 } else {
448 previous.1 + 1
449 };
450 let item = (start, current.0);
451 *previous = current;
452 Some(item)
453 })
454 .filter(|(start, end)| end > start);
455 for (start, end) in unscrubbed_ranges {
456 changed |= apply_regex_to_utf16le_bytes(
457 &mut data[start..end],
458 rule,
459 regex,
460 &replace_behavior,
461 );
462 }
463 }
464 }
465 }
466 }
467 }
468 }
469
470 changed
471 }
472
473 pub fn scrub_attachment(&self, filename: &str, data: &mut [u8]) -> bool {
477 let state = self.state(filename, ValueType::Binary);
478 self.scrub_bytes(data, &state, ScrubEncodings::All)
479 }
480
481 pub fn scrub_utf8_filepath(&self, path: &mut str, state: &ProcessingState<'_>) -> bool {
483 if let Some(index) = path.rfind(['/', '\\']) {
484 let data = unsafe { &mut path.as_bytes_mut()[..index] };
485 self.scrub_bytes(data, state, ScrubEncodings::Utf8)
486 } else {
487 false
488 }
489 }
490
491 pub fn scrub_utf16_filepath(
493 &self,
494 path: &mut WStr<LittleEndian>,
495 state: &ProcessingState<'_>,
496 ) -> bool {
497 let index = path
498 .char_indices()
499 .rev()
500 .find_map(|(i, c)| if c == '/' || c == '\\' { Some(i) } else { None });
501
502 if let Some(index) = index {
503 let data = unsafe { &mut path.as_bytes_mut()[..index] };
504 self.scrub_bytes(data, state, ScrubEncodings::Utf16Le)
505 } else {
506 false
507 }
508 }
509
510 pub fn scrub_json(&self, payload: &[u8]) -> Result<Vec<u8>, JsonScrubError> {
518 let output = Vec::new();
519
520 let visitor = JsonScrubVisitor::new(self.compiled_config);
521
522 let mut deserializer_inner = serde_json::Deserializer::from_slice(payload);
523 let deserializer = transform::Deserializer::new(&mut deserializer_inner, visitor);
524
525 let mut serializer = serde_json::Serializer::new(output);
526 serde_transcode::transcode(deserializer, &mut serializer)
527 .map_err(|_| JsonScrubError::TranscodeFailed)?;
528 Ok(serializer.into_inner())
529 }
530}
531
532#[cfg(test)]
533mod tests {
534 use itertools::Itertools;
535
536 use super::*;
537 use crate::PiiConfig;
538
539 enum AttachmentBytesTestCase<'a> {
540 Builtin {
541 selector: &'a str,
542 rule: &'a str,
543 filename: &'a str,
544 value_type: ValueType,
545 input: &'a [u8],
546 output: &'a [u8],
547 changed: bool,
548 },
549 Regex {
550 selector: &'a str,
551 regex: &'a str,
552 filename: &'a str,
553 value_type: ValueType,
554 input: &'a [u8],
555 output: &'a [u8],
556 changed: bool,
557 },
558 }
559
560 impl AttachmentBytesTestCase<'_> {
561 fn run(self) {
562 let (config, filename, value_type, input, expected, changed) = match self {
563 AttachmentBytesTestCase::Builtin {
564 selector,
565 rule,
566 filename,
567 value_type,
568 input,
569 output,
570 changed,
571 } => {
572 let config = serde_json::from_value::<PiiConfig>(serde_json::json!(
573 {
574 "applications": {
575 selector: [rule]
576 }
577 }
578 ))
579 .unwrap();
580 (config, filename, value_type, input, output, changed)
581 }
582 AttachmentBytesTestCase::Regex {
583 selector,
584 regex,
585 filename,
586 value_type,
587 input,
588 output,
589 changed,
590 } => {
591 let config = serde_json::from_value::<PiiConfig>(serde_json::json!(
592 {
593 "rules": {
594 "custom": {
595 "type": "pattern",
596 "pattern": regex,
597 "redaction": {
598 "method": "remove"
599 }
600 }
601 },
602 "applications": {
603 selector: ["custom"]
604 }
605 }
606 ))
607 .unwrap();
608 (config, filename, value_type, input, output, changed)
609 }
610 };
611
612 let mut actual = input.to_owned();
613 let processor = PiiAttachmentsProcessor::new(config.compiled());
614 let state = processor.state(filename, value_type);
615 let has_changed = processor.scrub_bytes(&mut actual, &state, ScrubEncodings::All);
616
617 assert!(
618 actual == expected,
619 "`actual == expected` in line {}:\n{}\n{}",
620 line!(),
621 pretty_hex::pretty_hex(&actual),
622 pretty_hex::pretty_hex(&expected),
623 );
624
625 assert_eq!(changed, has_changed);
626 }
627 }
628
629 fn utf16le(s: &str) -> Vec<u8> {
630 s.encode_utf16()
631 .map(|u| u.to_le_bytes())
632 .collect::<Vec<[u8; 2]>>()
633 .iter()
634 .flatten()
635 .copied()
636 .collect()
637 }
638
639 #[test]
640 fn test_ip_replace_padding() {
641 AttachmentBytesTestCase::Builtin {
642 selector: "$binary",
643 rule: "@ip",
644 filename: "foo.txt",
645 value_type: ValueType::Binary,
646 input: b"before 127.0.0.1 after",
647 output: b"before [ip]***** after",
648 changed: true,
649 }
650 .run();
651 }
652
653 #[test]
654 fn test_ip_replace_padding_utf16() {
655 AttachmentBytesTestCase::Builtin {
656 selector: "$binary",
657 rule: "@ip",
658 filename: "foo.txt",
659 value_type: ValueType::Binary,
660 input: utf16le("before 127.0.0.1 after").as_slice(),
661 output: utf16le("before [ip]***** after").as_slice(),
662 changed: true,
663 }
664 .run();
665 }
666
667 #[test]
668 fn test_ip_hash_trunchating() {
669 AttachmentBytesTestCase::Builtin {
670 selector: "$binary",
671 rule: "@ip:hash",
672 filename: "foo.txt",
673 value_type: ValueType::Binary,
674 input: b"before 127.0.0.1 after",
675 output: b"before AE12FE3B5 after",
676 changed: true,
677 }
678 .run();
679 }
680
681 #[test]
682 fn test_ip_hash_trunchating_utf16() {
683 AttachmentBytesTestCase::Builtin {
684 selector: "$binary",
685 rule: "@ip:hash",
686 filename: "foo.txt",
687 value_type: ValueType::Binary,
688 input: utf16le("before 127.0.0.1 after").as_slice(),
689 output: utf16le("before 3FA8F5A46 after").as_slice(),
690 changed: true,
691 }
692 .run();
693 }
694
695 #[test]
696 fn test_ip_masking() {
697 AttachmentBytesTestCase::Builtin {
698 selector: "$binary",
699 rule: "@ip:mask",
700 filename: "foo.txt",
701 value_type: ValueType::Binary,
702 input: b"before 127.0.0.1 after",
703 output: b"before ********* after",
704 changed: true,
705 }
706 .run();
707 }
708
709 #[test]
710 fn test_ip_masking_utf16() {
711 AttachmentBytesTestCase::Builtin {
712 selector: "$binary",
713 rule: "@ip:mask",
714 filename: "foo.txt",
715 value_type: ValueType::Binary,
716 input: utf16le("before 127.0.0.1 after").as_slice(),
717 output: utf16le("before ********* after").as_slice(),
718 changed: true,
719 }
720 .run();
721 }
722
723 #[test]
724 fn test_ip_removing() {
725 AttachmentBytesTestCase::Builtin {
726 selector: "$binary",
727 rule: "@ip:remove",
728 filename: "foo.txt",
729 value_type: ValueType::Binary,
730 input: b"before 127.0.0.1 after",
731 output: b"before ********* after",
732 changed: true,
733 }
734 .run();
735 }
736
737 #[test]
738 fn test_ip_removing_utf16() {
739 AttachmentBytesTestCase::Builtin {
740 selector: "$binary",
741 rule: "@ip:remove",
742 filename: "foo.txt",
743 value_type: ValueType::Binary,
744 input: utf16le("before 127.0.0.1 after").as_slice(),
745 output: utf16le("before ********* after").as_slice(),
746 changed: true,
747 }
748 .run();
749 }
750
751 #[test]
752 fn test_selectors() {
753 for wrong_selector in &[
754 "$string",
755 "$number",
756 "$attachments.* && $string",
757 "$attachments",
758 "** && !$binary",
759 ] {
760 AttachmentBytesTestCase::Builtin {
761 selector: wrong_selector,
762 rule: "@ip:mask",
763 filename: "foo.txt",
764 value_type: ValueType::Binary,
765 input: b"before 127.0.0.1 after",
766 output: b"before 127.0.0.1 after",
767 changed: false,
768 }
769 .run();
770 }
771 }
772
773 #[test]
774 fn test_all_the_bytes() {
775 AttachmentBytesTestCase::Builtin {
776 selector: "$binary",
777 rule: "@anything:remove",
778 filename: "foo.txt",
779 value_type: ValueType::Binary,
780 input: (0..255u8).collect::<Vec<_>>().as_slice(),
781 output: &[b'*'; 255],
782 changed: true,
783 }
784 .run();
785 }
786
787 #[test]
788 fn test_bytes_regexes() {
789 let samples: &[&[u8]] = &[
793 b"\xc3\x28", b"\xa0\xa1", b"\xe2\x28\xa1", b"\xe2\x82\x28", b"\xf0\x28\x8c\xbc", b"\xf0\x90\x28\xbc", b"\xf0\x28\x8c\x28", b"\xf8\xa1\xa1\xa1\xa1", b"\xfc\xa1\xa1\xa1\xa1\xa1", ];
803
804 for bytes in samples {
805 assert!(String::from_utf8(bytes.to_vec()).is_err());
806
807 AttachmentBytesTestCase::Regex {
808 selector: "$binary",
809 regex: &bytes.iter().map(|x| format!("\\x{x:02x}")).join(""),
810 filename: "foo.txt",
811 value_type: ValueType::Binary,
812 input: bytes,
813 output: &vec![b'*'; bytes.len()],
814 changed: true,
815 }
816 .run()
817 }
818 }
819
820 #[test]
821 fn test_segments_all_data() {
822 let mut data = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
823 let mut iter = WStrSegmentIter::new(&mut data[..]);
824
825 let segment = iter.next().unwrap();
826 assert_eq!(segment.decoded, "hello");
827 assert_eq!(segment.encoded.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
828
829 assert!(iter.next().is_none());
830 }
831
832 #[test]
833 fn test_segments_middle_2_byte_aligned() {
834 let mut data = Vec::from(&b"\xd8\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
835 let mut iter = WStrSegmentIter::new(&mut data[..]);
836
837 let segment = iter.next().unwrap();
838 assert_eq!(segment.decoded, "hello");
839 assert_eq!(segment.encoded.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
840
841 assert!(iter.next().is_none());
842 }
843
844 #[test]
845 fn test_segments_middle_2_byte_aligned_mutation() {
846 let mut data = Vec::from(&b"\xd8\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
847 let mut iter = WStrSegmentIter::new(&mut data[..]);
848
849 let segment = iter.next().unwrap();
850 unsafe {
851 segment
852 .encoded
853 .as_bytes_mut()
854 .copy_from_slice(&b"w\x00o\x00r\x00l\x00d\x00"[..]);
855 }
856
857 assert!(iter.next().is_none());
858
859 assert_eq!(data, b"\xd8\xd8\xd8\xd8w\x00o\x00r\x00l\x00d\x00\xd8\xd8");
860 }
861
862 #[test]
863 fn test_segments_middle_unaligned() {
864 let mut data = Vec::from(&b"\xd8\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8"[..]);
865 let mut iter = WStrSegmentIter::new(&mut data);
866
867 let segment = iter.next().unwrap();
869 assert_eq!(segment.decoded, "棘攀氀氀漀");
870
871 assert!(iter.next().is_none());
872 }
873
874 #[test]
875 fn test_segments_end_aligned() {
876 let mut data = Vec::from(&b"\xd8\xd8h\x00e\x00l\x00l\x00o\x00"[..]);
877 let mut iter = WStrSegmentIter::new(&mut data);
878
879 let segment = iter.next().unwrap();
880 assert_eq!(segment.decoded, "hello");
881
882 assert!(iter.next().is_none());
883 }
884
885 #[test]
886 fn test_segments_garbage() {
887 let mut data = Vec::from(&b"\xd8\xd8"[..]);
888 let mut iter = WStrSegmentIter::new(&mut data);
889
890 assert!(iter.next().is_none());
891 }
892
893 #[test]
894 fn test_segments_too_short() {
895 let mut data = Vec::from(&b"\xd8\xd8y\x00o\x00\xd8\xd8h\x00e\x00l\x00l\x00o\x00"[..]);
896 let mut iter = WStrSegmentIter::new(&mut data);
897
898 let segment = iter.next().unwrap();
899 assert_eq!(segment.decoded, "hello");
900
901 assert!(iter.next().is_none());
902 }
903
904 #[test]
905 fn test_segments_multiple() {
906 let mut data =
907 Vec::from(&b"\xd8\xd8h\x00e\x00l\x00l\x00o\x00\xd8\xd8w\x00o\x00r\x00l\x00d\x00"[..]);
908
909 let mut iter = WStrSegmentIter::new(&mut data);
910
911 let segment = iter.next().unwrap();
912 assert_eq!(segment.decoded, "hello");
913
914 let segment = iter.next().unwrap();
915 assert_eq!(segment.decoded, "world");
916
917 assert!(iter.next().is_none());
918 }
919
920 #[test]
921 fn test_fill_content_wstr() {
922 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
923 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
924 s.fill_content('x');
925 assert_eq!(b.as_slice(), b"x\x00x\x00x\x00x\x00x\x00");
926 }
927
928 #[test]
929 #[should_panic]
930 fn test_fill_content_wstr_panic() {
931 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
932 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
933 s.fill_content('\u{10000}');
934 }
935
936 #[test]
937 fn test_swap_content_wstr() {
938 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
940 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
941 s.swap_content("world", 'x');
942 assert_eq!(b.as_slice(), b"w\x00o\x00r\x00l\x00d\x00");
943
944 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00"[..]);
946 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
947 s.swap_content("hey", 'x');
948 assert_eq!(b.as_slice(), b"h\x00e\x00y\x00x\x00x\x00");
949
950 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
952 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
953 s.swap_content("world", 'x');
954 assert_eq!(b.as_slice(), b"w\x00o\x00r\x00");
955
956 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
958 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
959 s.swap_content("yo\u{10000}", 'x');
960 assert_eq!(b.as_slice(), b"y\x00o\x00x\x00");
961 }
962
963 #[test]
964 #[should_panic]
965 fn test_swap_content_wstr_panic() {
966 let mut b = Vec::from(&b"h\x00e\x00y\x00"[..]);
967 let s = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
968 s.swap_content("yo", '\u{10000}');
969 }
970
971 #[test]
972 #[allow(clippy::trivial_regex)]
973 fn test_get_wstr_match() {
974 let s = "hello there";
975 let mut b = Vec::from(&b"h\x00e\x00l\x00l\x00o\x00 \x00t\x00h\x00e\x00r\x00e\x00"[..]);
976 let w = WStr::from_utf16le_mut(b.as_mut_slice()).unwrap();
977
978 let re = Regex::new("hello").unwrap();
980 let re_match = re.find(s).unwrap();
981 let m = get_wstr_match(s, re_match, w);
982 assert_eq!(m.as_bytes(), b"h\x00e\x00l\x00l\x00o\x00");
983
984 let re = Regex::new(".*").unwrap();
986 let re_match = re.find(s).unwrap();
987 let m = get_wstr_match(s, re_match, w);
988 assert_eq!(
989 m.as_bytes(),
990 b"h\x00e\x00l\x00l\x00o\x00 \x00t\x00h\x00e\x00r\x00e\x00"
991 );
992 }
993}