relay_event_schema/processor/
chunks.rs1use std::borrow::Cow;
26use std::fmt;
27
28use relay_protocol::{Meta, Remark, RemarkType};
29use serde::{Deserialize, Serialize};
30
31#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
33#[serde(tag = "type", rename_all = "lowercase")]
34pub enum Chunk<'a> {
35 Text {
37 text: Cow<'a, str>,
39 },
40 Redaction {
42 text: Cow<'a, str>,
44 rule_id: Cow<'a, str>,
46 #[serde(rename = "remark")]
48 ty: RemarkType,
49 },
50}
51
52impl Chunk<'_> {
53 pub fn as_str(&self) -> &str {
55 match self {
56 Chunk::Text { text } => text,
57 Chunk::Redaction { text, .. } => text,
58 }
59 }
60
61 pub fn len(&self) -> usize {
63 self.as_str().len()
64 }
65
66 pub fn count(&self) -> usize {
68 bytecount::num_chars(self.as_str().as_bytes())
69 }
70
71 pub fn is_empty(&self) -> bool {
73 self.len() == 0
74 }
75}
76
77impl fmt::Display for Chunk<'_> {
78 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79 write!(f, "{}", self.as_str())
80 }
81}
82
83pub fn split_chunks<'a, I>(text: &'a str, remarks: I) -> Vec<Chunk<'a>>
85where
86 I: IntoIterator<Item = &'a Remark>,
87{
88 let mut rv = vec![];
89 let mut pos = 0;
90
91 for remark in remarks {
92 let (from, to) = match remark.range() {
93 Some(range) => *range,
94 None => continue,
95 };
96
97 if from > pos {
98 if let Some(piece) = text.get(pos..from) {
99 rv.push(Chunk::Text {
100 text: Cow::Borrowed(piece),
101 });
102 } else {
103 break;
104 }
105 }
106 if let Some(piece) = text.get(from..to) {
107 rv.push(Chunk::Redaction {
108 text: Cow::Borrowed(piece),
109 rule_id: remark.rule_id().into(),
110 ty: remark.ty(),
111 });
112 } else {
113 break;
114 }
115 pos = to;
116 }
117
118 if pos < text.len() {
119 if let Some(piece) = text.get(pos..) {
120 rv.push(Chunk::Text {
121 text: Cow::Borrowed(piece),
122 });
123 }
124 }
125
126 rv
127}
128
129pub fn join_chunks<'a, I>(chunks: I) -> (String, Vec<Remark>)
131where
132 I: IntoIterator<Item = Chunk<'a>>,
133{
134 let mut rv = String::new();
135 let mut remarks = vec![];
136 let mut pos = 0;
137
138 for chunk in chunks {
139 let new_pos = pos + chunk.len();
140 rv.push_str(chunk.as_str());
141
142 match chunk {
143 Chunk::Redaction { rule_id, ty, .. } => {
144 remarks.push(Remark::with_range(ty, rule_id.clone(), (pos, new_pos)))
145 }
146 Chunk::Text { .. } => {
147 }
149 }
150
151 pos = new_pos;
152 }
153
154 (rv, remarks)
155}
156
157pub fn process_chunked_value<F>(value: &mut String, meta: &mut Meta, f: F)
160where
161 F: FnOnce(Vec<Chunk>) -> Vec<Chunk>,
162{
163 let chunks = split_chunks(value, meta.iter_remarks());
164 let (new_value, remarks) = join_chunks(f(chunks));
165
166 if new_value != *value {
167 meta.clear_remarks();
168 for remark in remarks.into_iter() {
169 meta.add_remark(remark);
170 }
171 meta.set_original_length(Some(bytecount::num_chars(value.as_bytes())));
172 *value = new_value;
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use similar_asserts::assert_eq;
179
180 use super::*;
181
182 #[test]
183 fn test_chunk_split() {
184 let remarks = vec![Remark::with_range(
185 RemarkType::Masked,
186 "@email:strip",
187 (33, 47),
188 )];
189
190 let text = "Hello Peter, my email address is ****@*****.com. See you";
191
192 let chunks = vec![
193 Chunk::Text {
194 text: "Hello Peter, my email address is ".into(),
195 },
196 Chunk::Redaction {
197 ty: RemarkType::Masked,
198 text: "****@*****.com".into(),
199 rule_id: "@email:strip".into(),
200 },
201 Chunk::Text {
202 text: ". See you".into(),
203 },
204 ];
205
206 assert_eq!(split_chunks(text, &remarks), chunks);
207 assert_eq!(join_chunks(chunks), (text.into(), remarks));
208 }
209}