document_pii/
item_collector.rs1use std::collections::{BTreeMap, BTreeSet, HashMap};
6use std::fs::{self, DirEntry};
7use std::io::BufRead;
8use std::path::{Path, PathBuf};
9
10use anyhow::anyhow;
11use syn::punctuated::Punctuated;
12use syn::visit::Visit;
13use syn::{ItemEnum, ItemStruct, UseTree};
14
15use crate::EnumOrStruct;
16use crate::pii_finder::{FieldsWithAttribute, PiiFinder};
17
18pub struct TypesAndScopedPaths {
19 pub all_types: HashMap<String, EnumOrStruct>,
21 pub scoped_paths: BTreeMap<String, BTreeSet<String>>,
24}
25
26impl TypesAndScopedPaths {
27 pub fn find_pii_fields(
28 &self,
29 type_path: Option<&str>,
30 pii_values: &Vec<String>,
31 ) -> anyhow::Result<BTreeSet<FieldsWithAttribute>> {
32 let fields = match type_path {
33 Some(path) => self.find_pii_fields_of_type(path),
35 None => self.find_pii_fields_of_all_types(),
37 }?;
38
39 Ok(fields
40 .into_iter()
41 .filter(|pii| {
42 pii.has_attribute("pii", Some(pii_values))
43 && (pii.has_attribute("retain", Some(&vec!["true".to_owned()]))
44 || !pii.has_attribute("additional_properties", None))
45 })
46 .collect())
47 }
48
49 fn find_pii_fields_of_type(
51 &self,
52 type_path: &str,
53 ) -> anyhow::Result<BTreeSet<FieldsWithAttribute>> {
54 let mut visitor = PiiFinder::new(type_path, &self.all_types, &self.scoped_paths)?;
55
56 let value = &self
57 .all_types
58 .get(type_path)
59 .ok_or_else(|| anyhow!("Unable to find item with following path: {}", type_path))?;
60
61 match value {
62 EnumOrStruct::Struct(itemstruct) => visitor.visit_item_struct(itemstruct),
63 EnumOrStruct::Enum(itemenum) => visitor.visit_item_enum(itemenum),
64 };
65 Ok(visitor.pii_types)
66 }
67
68 fn find_pii_fields_of_all_types(&self) -> anyhow::Result<BTreeSet<FieldsWithAttribute>> {
70 let mut pii_types = BTreeSet::new();
71
72 for type_path in self.all_types.keys() {
73 pii_types.extend(self.find_pii_fields_of_type(type_path)?);
74 }
75
76 Ok(pii_types)
77 }
78}
79
80#[derive(Default)]
82pub struct AstItemCollector {
83 module_path: String,
84 all_types: HashMap<String, EnumOrStruct>,
86 scoped_paths: BTreeMap<String, BTreeSet<String>>,
88}
89
90impl AstItemCollector {
91 fn insert_scoped_paths(&mut self, use_statements: Vec<String>) {
92 self.scoped_paths
93 .entry(self.module_path.clone())
94 .or_default()
95 .extend(use_statements);
96 }
97
98 pub fn collect(paths: &[PathBuf]) -> anyhow::Result<TypesAndScopedPaths> {
102 let mut visitor = Self::default();
103
104 visitor.visit_files(paths)?;
105
106 Ok(TypesAndScopedPaths {
107 all_types: visitor.all_types,
108 scoped_paths: visitor.scoped_paths,
109 })
110 }
111
112 fn visit_files(&mut self, paths: &[PathBuf]) -> anyhow::Result<()> {
113 for path in paths {
114 self.module_path = module_name_from_file(path)?;
115
116 let syntax_tree: syn::File = {
117 let file_content = fs::read_to_string(path.as_path())?;
118 syn::parse_file(&file_content)?
119 };
120
121 self.visit_file(&syntax_tree);
122 }
123 Ok(())
124 }
125}
126
127impl<'ast> Visit<'ast> for AstItemCollector {
128 fn visit_item_struct(&mut self, node: &'ast ItemStruct) {
129 let struct_name = format!("{}::{}", self.module_path, node.ident);
130 self.insert_scoped_paths(vec![struct_name.clone()]);
131 self.all_types
132 .insert(struct_name, EnumOrStruct::Struct(node.clone()));
133 }
134
135 fn visit_item_enum(&mut self, node: &'ast ItemEnum) {
136 let enum_name = format!("{}::{}", self.module_path, node.ident);
137 self.insert_scoped_paths(vec![enum_name.clone()]);
138 self.all_types
139 .insert(enum_name, EnumOrStruct::Enum(node.clone()));
140 }
141
142 fn visit_item_use(&mut self, i: &'ast syn::ItemUse) {
143 let use_statements = usetree_to_paths(&i.tree, &self.module_path)
144 .iter()
145 .filter(|s| s.contains("relay"))
146 .cloned()
147 .collect();
148
149 self.insert_scoped_paths(use_statements);
150 }
151}
152
153fn normalize_type_path(mut path: String, crate_root: &str, module_path: &str) -> String {
154 path = path
155 .replace(' ', "")
156 .replace('-', "_")
157 .replace("crate::", &format!("{crate_root}::"));
158
159 if path.contains("super::") {
160 let parent_module = {
161 let mut parts = module_path.split("::").collect::<Vec<_>>();
162 parts.pop();
163 parts.join("::")
164 };
165 path = path.replace("super::", &parent_module);
166 }
167 path
168}
169
170fn usetree_to_paths(use_tree: &UseTree, module_path: &str) -> Vec<String> {
172 let crate_root = module_path.split_once("::").map_or(module_path, |s| s.0);
173 let paths = flatten_use_tree(
174 syn::Path {
175 leading_colon: None,
176 segments: Punctuated::new(),
177 },
178 use_tree,
179 );
180
181 paths
182 .into_iter()
183 .map(|path| normalize_type_path(path, crate_root, module_path))
184 .collect()
185}
186
187fn flatten_use_tree(mut leading_path: syn::Path, use_tree: &UseTree) -> Vec<String> {
192 match use_tree {
193 UseTree::Path(use_path) => {
194 leading_path.segments.push(use_path.ident.clone().into());
195 flatten_use_tree(leading_path, &use_path.tree)
196 }
197 UseTree::Name(use_name) => {
198 leading_path.segments.push(use_name.ident.clone().into());
199 vec![quote::quote!(#leading_path).to_string()]
200 }
201 UseTree::Group(use_group) => {
202 let mut paths = Vec::new();
203 for item in &use_group.items {
204 paths.extend(flatten_use_tree(leading_path.clone(), item));
205 }
206 paths
207 }
208
209 UseTree::Rename(use_rename) => {
210 leading_path.segments.push(use_rename.rename.clone().into());
211 vec![quote::quote!(#leading_path).to_string()]
212 }
213 UseTree::Glob(_) => vec![quote::quote!(#leading_path).to_string()],
215 }
216}
217
218fn crate_name_from_file(file_path: &Path) -> anyhow::Result<String> {
219 let file_str = file_path.to_string_lossy();
223
224 let src_index = file_str
225 .find("/src/")
226 .or_else(|| file_str.find("\\src\\"))
227 .ok_or_else(|| {
228 anyhow!(
229 "Invalid file path (missing '/src/' or '\\src\\'): {}",
230 file_path.display()
231 )
232 })?;
233
234 let back_index = file_str[..src_index]
235 .rfind('/')
236 .or_else(|| file_str[..src_index].rfind('\\'))
237 .ok_or_else(|| {
238 anyhow!(
239 "Invalid file path (missing separator before '/src/' or '\\src\\'): {}",
240 file_path.display()
241 )
242 })?
243 + 1;
244
245 Ok(file_str
246 .split_at(src_index)
247 .0
248 .split_at(back_index)
249 .1
250 .to_string())
251}
252
253fn add_file_stem_to_module_path(
254 file_path: &Path,
255 module_path: &mut Vec<String>,
256) -> anyhow::Result<()> {
257 let file_stem = file_path
258 .file_stem()
259 .ok_or_else(|| {
260 anyhow!(
261 "Invalid file path (unable to find file stem): {}",
262 file_path.display()
263 )
264 })?
265 .to_string_lossy()
266 .into_owned();
267
268 module_path.push(file_stem);
269 Ok(())
270}
271
272fn module_name_from_file(file_path: &Path) -> anyhow::Result<String> {
276 let mut module_path = file_path
277 .parent()
278 .ok_or_else(|| {
279 anyhow!(
280 "Invalid file path (unable to find parent directory): {}",
281 file_path.display()
282 )
283 })?
284 .components()
285 .map(|part| part.as_os_str().to_string_lossy().into_owned())
286 .filter(|part| part != "src")
287 .collect::<Vec<String>>();
288
289 if is_file_module(file_path)? {
290 add_file_stem_to_module_path(file_path, &mut module_path)?;
291 }
292
293 let crate_name = crate_name_from_file(file_path).unwrap();
294
295 Ok(module_path
297 .iter()
298 .position(|s| s == &crate_name)
299 .map(|index| &module_path[index..])
300 .ok_or_else(|| anyhow!("Couldn't find crate name {}.", crate_name))?
301 .join("::")
302 .replace('-', "_"))
303}
304
305fn is_file_declared_from_mod_file(parent_dir: &Path, file_stem: &str) -> anyhow::Result<bool> {
306 let mod_rs_path = parent_dir.join("mod.rs");
307 if !mod_rs_path.exists() {
308 return Ok(false);
309 }
310 let mod_rs_file: fs::File = fs::File::open(mod_rs_path)?;
313 let reader = std::io::BufReader::new(mod_rs_file);
314
315 for line in reader.lines() {
316 let line = line?;
317 if line.trim().starts_with("pub mod") && line.contains(file_stem) {
318 return Ok(true);
319 }
320 }
321 Ok(false)
322}
323
324fn is_file_declared_from_other_file(
325 entry: &DirEntry,
326 file_stem: &str,
327 file_path: &Path,
328) -> anyhow::Result<bool> {
329 let path = entry.path();
330
331 if path.is_file() && path.extension().is_some_and(|ext| ext == "rs") && path != *file_path {
332 let file = fs::File::open(path)?;
334 let reader = std::io::BufReader::new(file);
335
336 for line in reader.lines() {
337 let line = line?;
338 if line.trim().starts_with("pub mod") && line.contains(file_stem) {
339 return Ok(true);
340 }
341 }
342 }
343 Ok(false)
344}
345
346fn is_file_module(file_path: &Path) -> anyhow::Result<bool> {
348 let parent_dir = file_path
349 .parent()
350 .ok_or_else(|| anyhow!("Invalid file path: {}", file_path.display()))?;
351 let file_stem = file_path
352 .file_stem()
353 .ok_or_else(|| anyhow!("Invalid file path: {}", file_path.display()))?
354 .to_string_lossy();
355
356 if is_file_declared_from_mod_file(parent_dir, &file_stem)? {
357 return Ok(true);
358 }
359
360 for entry in fs::read_dir(parent_dir)? {
361 let entry = entry?;
362 if is_file_declared_from_other_file(&entry, &file_stem, file_path)? {
363 return Ok(true);
364 }
365 }
366
367 Ok(false)
368}