document_pii/
main.rs

1#![doc(
2    html_logo_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png",
3    html_favicon_url = "https://raw.githubusercontent.com/getsentry/relay/master/artwork/relay-icon.png"
4)]
5
6use std::collections::BTreeSet;
7use std::fs::File;
8use std::path::PathBuf;
9
10use clap::{Parser, command};
11use serde::Serialize;
12use syn::{ItemEnum, ItemStruct};
13use walkdir::WalkDir;
14
15use crate::item_collector::AstItemCollector;
16use crate::pii_finder::FieldsWithAttribute;
17
18pub mod item_collector;
19pub mod pii_finder;
20
21/// Structs and Enums are the only items that are relevant for finding PII fields.
22#[derive(Clone)]
23pub enum EnumOrStruct {
24    Struct(ItemStruct),
25    Enum(ItemEnum),
26}
27
28/// Gets all the .rs files in a given rust crate/workspace.
29fn find_rs_files(dir: &PathBuf) -> Vec<std::path::PathBuf> {
30    let walker = WalkDir::new(dir).into_iter();
31    let mut rs_files = Vec::new();
32
33    for entry in walker.filter_map(walkdir::Result::ok) {
34        if !entry.path().to_string_lossy().contains("src") {
35            continue;
36        }
37        if entry.file_type().is_file() && entry.path().extension().is_some_and(|ext| ext == "rs") {
38            rs_files.push(entry.into_path());
39        }
40    }
41    rs_files
42}
43
44/// Prints documentation for metrics.
45#[derive(Debug, Parser, Default)]
46#[command(verbatim_doc_comment)]
47pub struct Cli {
48    /// Optional output path. By default, documentation is printed on stdout.
49    #[arg(short, long)]
50    pub output: Option<PathBuf>,
51
52    /// Path to the rust crate/workspace.
53    #[arg(short, long)]
54    pub path: Option<PathBuf>,
55
56    /// The struct or enum of which you want to find all PII fields. Checks all items if none is
57    /// provided.
58    #[arg(short, long)]
59    pub item: Option<String>,
60
61    /// Vector of which PII-values should be looked for, options are: "true, maybe, false".
62    #[arg(long, default_value = "true")]
63    pub pii_values: Vec<String>,
64}
65
66impl Cli {
67    pub fn run(self) -> anyhow::Result<()> {
68        // User must either provide the path to a rust crate/workspace or be in one when calling this script.
69        let path = match self.path.clone() {
70            Some(path) => {
71                if !path.join("Cargo.toml").exists() {
72                    anyhow::bail!("Please provide the path to a rust crate/workspace");
73                }
74                path
75            }
76            None => std::env::current_dir()?,
77        };
78
79        // Before we can iterate over the PII fields properly, we make a mapping between all
80        // paths to types and their AST node, and of all modules and the items in their scope.
81        let types_and_use_statements = {
82            let rust_file_paths = find_rs_files(&path);
83            AstItemCollector::collect(&rust_file_paths)?
84        };
85
86        let pii_types =
87            types_and_use_statements.find_pii_fields(self.item.as_deref(), &self.pii_values)?;
88
89        // Function also takes a string to replace unnamed fields, for now we just remove them.
90        let output_vec = Output::from_btreeset(pii_types);
91
92        match self.output {
93            Some(ref path) => serde_json::to_writer_pretty(File::create(path)?, &output_vec)?,
94            None => serde_json::to_writer_pretty(std::io::stdout(), &output_vec)?,
95        };
96
97        Ok(())
98    }
99}
100
101#[derive(Serialize, Default, Debug)]
102struct Output {
103    path: String,
104    additional_properties: bool,
105}
106
107impl Output {
108    fn new(pii_type: FieldsWithAttribute) -> Self {
109        let mut output = Self {
110            additional_properties: pii_type.attributes.contains_key("additional_properties"),
111            ..Default::default()
112        };
113
114        output
115            .path
116            .push_str(&pii_type.type_and_fields[0].qualified_type_name);
117
118        let mut iter = pii_type.type_and_fields.iter().peekable();
119        while let Some(path) = iter.next() {
120            // If field has attribute "additional_properties" it means it's not a real field
121            // but represents unstrucutred data. So we remove it and pass the information as a boolean
122            // in order to properly document this fact in the docs.
123            if !(output.additional_properties && iter.peek().is_none()) {
124                output.path.push_str(&format!(".{}", path.field_ident));
125            }
126        }
127
128        output.path = output.path.replace("{{Unnamed}}.", "");
129        output
130    }
131
132    /// Represent the PII fields in a format that will be used in the final output.
133    fn from_btreeset(pii_types: BTreeSet<FieldsWithAttribute>) -> Vec<Self> {
134        let mut output_vec = vec![];
135        for pii in pii_types {
136            output_vec.push(Output::new(pii));
137        }
138        output_vec.sort_by(|a, b| a.path.cmp(&b.path));
139
140        output_vec
141    }
142}
143
144fn print_error(error: &anyhow::Error) {
145    eprintln!("Error: {error}");
146
147    let mut cause = error.source();
148    while let Some(ref e) = cause {
149        eprintln!("  caused by: {e}");
150        cause = e.source();
151    }
152}
153
154fn main() {
155    let cli = Cli::parse();
156
157    match cli.run() {
158        Ok(()) => (),
159        Err(error) => {
160            print_error(&error);
161            std::process::exit(1);
162        }
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use path_slash::PathBufExt;
169
170    use crate::item_collector::TypesAndScopedPaths;
171
172    use super::*;
173
174    const RUST_TEST_CRATE: &str = "../../tests/test_pii_docs";
175
176    fn get_types_and_use_statements() -> TypesAndScopedPaths {
177        let rust_crate = PathBuf::from_slash(RUST_TEST_CRATE);
178        let rust_file_paths = find_rs_files(&rust_crate);
179        AstItemCollector::collect(&rust_file_paths).unwrap()
180    }
181
182    // On windows the assert fails because of how file paths are different there.
183    #[cfg(not(target_os = "windows"))]
184    #[test]
185    fn test_find_rs_files() {
186        let rust_crate = PathBuf::from_slash(RUST_TEST_CRATE);
187        let mut rust_file_paths = find_rs_files(&rust_crate);
188        rust_file_paths.sort_unstable();
189        insta::assert_debug_snapshot!(rust_file_paths);
190    }
191
192    #[test]
193    fn test_single_type() {
194        let types_and_use_statements = get_types_and_use_statements();
195
196        let pii_types = types_and_use_statements
197            .find_pii_fields(Some("test_pii_docs::SubStruct"), &vec!["true".to_owned()])
198            .unwrap();
199
200        let output = Output::from_btreeset(pii_types);
201        insta::assert_debug_snapshot!(output);
202    }
203
204    #[test]
205    fn test_scoped_paths() {
206        let types_and_use_statements = get_types_and_use_statements();
207
208        let TypesAndScopedPaths { scoped_paths, .. } = types_and_use_statements;
209        insta::assert_debug_snapshot!(scoped_paths);
210    }
211
212    #[test]
213    fn test_pii_true() {
214        let types_and_use_statements = get_types_and_use_statements();
215
216        let pii_types = types_and_use_statements
217            .find_pii_fields(None, &vec!["true".to_owned()])
218            .unwrap();
219
220        let output = Output::from_btreeset(pii_types);
221        insta::assert_debug_snapshot!(output);
222    }
223
224    #[test]
225    fn test_pii_false() {
226        let types_and_use_statements = get_types_and_use_statements();
227
228        let pii_types = types_and_use_statements
229            .find_pii_fields(None, &vec!["false".to_owned()])
230            .unwrap();
231
232        let output = Output::from_btreeset(pii_types);
233        insta::assert_debug_snapshot!(output);
234    }
235
236    #[test]
237    fn test_pii_all() {
238        let types_and_use_statements = get_types_and_use_statements();
239
240        let pii_types = types_and_use_statements
241            .find_pii_fields(
242                None,
243                &vec!["true".to_owned(), "false".to_owned(), "maybe".to_owned()],
244            )
245            .unwrap();
246
247        let output = Output::from_btreeset(pii_types);
248        insta::assert_debug_snapshot!(output);
249    }
250
251    #[test]
252    fn test_pii_retain_additional_properties_truth_table()
253    /*
254    Fields should be chosen if there is a pii match, and either retain = "true", or there's no
255    "additional_properties" attribute.
256    Logic: ((pii match) & (retain = "true" | !additional_properties))
257
258    truth table:
259
260    +-----------+-----------------+----------  ------------+----------+
261    | pii match | retain = "true" | !additional_properties | selected |
262    +-----------+----------------------+-------------------+----------+
263    | True      | True            | True                   | True     |
264    | True      | True            | False                  | True     |
265    | True      | False           | True                   | False    |
266    | True      | False           | False                  | True     |
267    | False     | True            | True                   | False    |
268    | False     | True            | False                  | False    |
269    | False     | False           | True                   | False    |
270    | False     | False           | False                  | False    |
271    +-----------+-----------------+------------------------+----------+
272
273     */
274    {
275        let types_and_use_statements = get_types_and_use_statements();
276
277        let pii_types = types_and_use_statements
278            .find_pii_fields(None, &vec!["truth_table_test".to_owned()])
279            .unwrap();
280
281        let output = Output::from_btreeset(pii_types);
282        insta::assert_debug_snapshot!(output);
283    }
284}