relay_event_schema/processor/
chunks.rs

1//! Utilities for dealing with annotated strings.
2//!
3//! This module contains the `split` and `join` function to destructure and recombine strings by
4//! redaction remarks. This allows to quickly inspect modified sections of a string.
5//!
6//! ### Example
7//!
8//! ```
9//! use relay_event_schema::processor;
10//! use relay_protocol::{Meta, Remark, RemarkType};
11//!
12//! let remarks = vec![Remark::with_range(
13//!     RemarkType::Substituted,
14//!     "myrule",
15//!     (7, 17),
16//! )];
17//!
18//! let chunks = processor::split_chunks("Hello, [redacted]!", &remarks);
19//! let (joined, join_remarks) = processor::join_chunks(chunks);
20//!
21//! assert_eq!(joined, "Hello, [redacted]!");
22//! assert_eq!(join_remarks, remarks);
23//! ```
24
25use std::borrow::Cow;
26use std::fmt;
27
28use relay_protocol::{Meta, Remark, RemarkType};
29use serde::{Deserialize, Serialize};
30
31/// A type for dealing with chunks of annotated text.
32#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
33#[serde(tag = "type", rename_all = "lowercase")]
34pub enum Chunk<'a> {
35    /// Unmodified text chunk.
36    Text {
37        /// The text value of the chunk
38        text: Cow<'a, str>,
39    },
40    /// Redacted text chunk with a note.
41    Redaction {
42        /// The redacted text value
43        text: Cow<'a, str>,
44        /// The rule that crated this redaction
45        rule_id: Cow<'a, str>,
46        /// Type type of remark for this redaction
47        #[serde(rename = "remark")]
48        ty: RemarkType,
49    },
50}
51
52impl Chunk<'_> {
53    /// The text of this chunk.
54    pub fn as_str(&self) -> &str {
55        match self {
56            Chunk::Text { text } => text,
57            Chunk::Redaction { text, .. } => text,
58        }
59    }
60
61    /// Effective length of the text in this chunk.
62    pub fn len(&self) -> usize {
63        self.as_str().len()
64    }
65
66    /// The number of UTF-8 encoded Unicode codepoints in this chunk.
67    pub fn count(&self) -> usize {
68        bytecount::num_chars(self.as_str().as_bytes())
69    }
70
71    /// Determines whether this chunk is empty.
72    pub fn is_empty(&self) -> bool {
73        self.len() == 0
74    }
75}
76
77impl fmt::Display for Chunk<'_> {
78    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79        write!(f, "{}", self.as_str())
80    }
81}
82
83/// Chunks the given text based on remarks.
84pub fn split_chunks<'a, I>(text: &'a str, remarks: I) -> Vec<Chunk<'a>>
85where
86    I: IntoIterator<Item = &'a Remark>,
87{
88    let mut rv = vec![];
89    let mut pos = 0;
90
91    for remark in remarks {
92        let (from, to) = match remark.range() {
93            Some(range) => *range,
94            None => continue,
95        };
96
97        if from > pos {
98            if let Some(piece) = text.get(pos..from) {
99                rv.push(Chunk::Text {
100                    text: Cow::Borrowed(piece),
101                });
102            } else {
103                break;
104            }
105        }
106        if let Some(piece) = text.get(from..to) {
107            rv.push(Chunk::Redaction {
108                text: Cow::Borrowed(piece),
109                rule_id: remark.rule_id().into(),
110                ty: remark.ty(),
111            });
112        } else {
113            break;
114        }
115        pos = to;
116    }
117
118    if pos < text.len() {
119        if let Some(piece) = text.get(pos..) {
120            rv.push(Chunk::Text {
121                text: Cow::Borrowed(piece),
122            });
123        }
124    }
125
126    rv
127}
128
129/// Concatenates chunks into a string and emits remarks for redacted sections.
130pub fn join_chunks<'a, I>(chunks: I) -> (String, Vec<Remark>)
131where
132    I: IntoIterator<Item = Chunk<'a>>,
133{
134    let mut rv = String::new();
135    let mut remarks = vec![];
136    let mut pos = 0;
137
138    for chunk in chunks {
139        let new_pos = pos + chunk.len();
140        rv.push_str(chunk.as_str());
141
142        match chunk {
143            Chunk::Redaction { rule_id, ty, .. } => {
144                remarks.push(Remark::with_range(ty, rule_id.clone(), (pos, new_pos)))
145            }
146            Chunk::Text { .. } => {
147                // Plain text segments do not need remarks
148            }
149        }
150
151        pos = new_pos;
152    }
153
154    (rv, remarks)
155}
156
157/// Splits the string into chunks, maps each chunk and then joins chunks again, emitting
158/// remarks along the process.
159pub fn process_chunked_value<F>(value: &mut String, meta: &mut Meta, f: F)
160where
161    F: FnOnce(Vec<Chunk>) -> Vec<Chunk>,
162{
163    let chunks = split_chunks(value, meta.iter_remarks());
164    let (new_value, remarks) = join_chunks(f(chunks));
165
166    if new_value != *value {
167        meta.clear_remarks();
168        for remark in remarks.into_iter() {
169            meta.add_remark(remark);
170        }
171        meta.set_original_length(Some(bytecount::num_chars(value.as_bytes())));
172        *value = new_value;
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use similar_asserts::assert_eq;
179
180    use super::*;
181
182    #[test]
183    fn test_chunk_split() {
184        let remarks = vec![Remark::with_range(
185            RemarkType::Masked,
186            "@email:strip",
187            (33, 47),
188        )];
189
190        let text = "Hello Peter, my email address is ****@*****.com. See you";
191
192        let chunks = vec![
193            Chunk::Text {
194                text: "Hello Peter, my email address is ".into(),
195            },
196            Chunk::Redaction {
197                ty: RemarkType::Masked,
198                text: "****@*****.com".into(),
199                rule_id: "@email:strip".into(),
200            },
201            Chunk::Text {
202                text: ". See you".into(),
203            },
204        ];
205
206        assert_eq!(split_chunks(text, &remarks), chunks);
207        assert_eq!(join_chunks(chunks), (text.into(), remarks));
208    }
209}