xref: /aosp_15_r20/development/tools/cargo_embargo/src/cargo/cargo_out.rs (revision 90c8c64db3049935a07c6143d7fd006e26f8ecca)
1 // Copyright (C) 2022 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 use super::metadata::WorkspaceMetadata;
16 use super::{Crate, CrateType, Extern, ExternType};
17 use crate::CargoOutput;
18 use anyhow::anyhow;
19 use anyhow::bail;
20 use anyhow::Context;
21 use anyhow::Result;
22 use log::debug;
23 use regex::Regex;
24 use std::collections::BTreeMap;
25 use std::env;
26 use std::path::Path;
27 use std::path::PathBuf;
28 use std::sync::LazyLock;
29 
30 /// Reads the given `cargo.out` and `cargo.metadata` files, and generates a list of crates based on
31 /// the rustc invocations.
32 ///
33 /// Ignores crates outside the current directory and build script crates.
parse_cargo_out(cargo_output: &CargoOutput) -> Result<Vec<Crate>>34 pub fn parse_cargo_out(cargo_output: &CargoOutput) -> Result<Vec<Crate>> {
35     let metadata = serde_json::from_str(&cargo_output.cargo_metadata)
36         .context("failed to parse cargo metadata")?;
37     parse_cargo_out_str(
38         &cargo_output.cargo_out,
39         &metadata,
40         env::current_dir().unwrap().canonicalize().unwrap(),
41     )
42 }
43 
44 /// Parses the given `cargo.out` and `cargo.metadata` file contents and generates a list of crates
45 /// based on the rustc invocations.
46 ///
47 /// Ignores crates outside `base_directory` and build script crates.
parse_cargo_out_str( cargo_out: &str, metadata: &WorkspaceMetadata, base_directory: impl AsRef<Path>, ) -> Result<Vec<Crate>>48 fn parse_cargo_out_str(
49     cargo_out: &str,
50     metadata: &WorkspaceMetadata,
51     base_directory: impl AsRef<Path>,
52 ) -> Result<Vec<Crate>> {
53     let cargo_out = CargoOut::parse(cargo_out).context("failed to parse cargo.out")?;
54     debug!("Parsed cargo output: {:?}", cargo_out);
55 
56     assert!(cargo_out.cc_invocations.is_empty(), "cc not supported yet");
57     assert!(cargo_out.ar_invocations.is_empty(), "ar not supported yet");
58 
59     let mut raw_names = BTreeMap::new();
60     for rustc in cargo_out.rustc_invocations.iter() {
61         raw_name_from_rustc_invocation(rustc, &mut raw_names)
62     }
63 
64     let mut crates = Vec::new();
65     for rustc in cargo_out.rustc_invocations.iter() {
66         let c = Crate::from_rustc_invocation(rustc, metadata, &cargo_out.tests, &raw_names)
67             .with_context(|| format!("failed to process rustc invocation: {rustc}"))?;
68         // Ignore build.rs crates.
69         if c.name.starts_with("build_script_") {
70             continue;
71         }
72         // Ignore crates outside the base directory.
73         if !c.package_dir.starts_with(&base_directory) {
74             continue;
75         }
76         crates.push(c);
77     }
78     crates.dedup();
79     Ok(crates)
80 }
81 
args_from_rustc_invocation(rustc: &str) -> Vec<&str>82 fn args_from_rustc_invocation(rustc: &str) -> Vec<&str> {
83     let mut args = Vec::new();
84     let mut chars = rustc.char_indices();
85     while let Some((start, c)) = chars.next() {
86         match c {
87             '\'' => {
88                 let (end, _) =
89                     chars.find(|(_, c)| *c == '\'').expect("Missing closing single quote");
90                 args.push(&rustc[start + 1..end]);
91             }
92             '"' => {
93                 let (end, _) =
94                     chars.find(|(_, c)| *c == '"').expect("Missing closing double quote");
95                 args.push(&rustc[start + 1..end]);
96             }
97             _ => {
98                 if c.is_ascii_whitespace() {
99                     // Ignore, continue on to the next character.
100                 } else if let Some((end, _)) = chars.find(|(_, c)| c.is_ascii_whitespace()) {
101                     args.push(&rustc[start..end]);
102                 } else {
103                     args.push(&rustc[start..]);
104                 }
105             }
106         }
107     }
108     args
109 }
110 
111 /// Parse out the path name for a crate from a rustc invocation
raw_name_from_rustc_invocation(rustc: &str, raw_names: &mut BTreeMap<String, String>)112 fn raw_name_from_rustc_invocation(rustc: &str, raw_names: &mut BTreeMap<String, String>) {
113     let mut crate_name = String::new();
114     // split into args
115     let mut arg_iter = args_from_rustc_invocation(rustc).into_iter();
116     // look for the crate name and a string ending in .rs and check whether the
117     // path string contains a kebab-case version of the crate name
118     while let Some(arg) = arg_iter.next() {
119         match arg {
120             "--crate-name" => crate_name = arg_iter.next().unwrap().to_string(),
121             _ if arg.ends_with(".rs") => {
122                 assert_ne!(crate_name, "", "--crate-name option should precede input");
123                 let snake_case_arg = arg.replace('-', "_");
124                 if let Some(idx) = snake_case_arg.rfind(&crate_name) {
125                     let raw_name = arg[idx..idx + crate_name.len()].to_string();
126                     if crate_name != raw_name {
127                         raw_names.insert(crate_name, raw_name);
128                     }
129                 }
130                 break;
131             }
132             _ => {}
133         }
134     }
135 }
136 
137 /// Whether a test target contains any tests or benchmarks.
138 #[derive(Debug)]
139 struct TestContents {
140     tests: bool,
141     benchmarks: bool,
142 }
143 
144 /// Raw-ish data extracted from cargo.out file.
145 #[derive(Debug, Default)]
146 struct CargoOut {
147     rustc_invocations: Vec<String>,
148 
149     // package name => cmd args
150     cc_invocations: BTreeMap<String, String>,
151     ar_invocations: BTreeMap<String, String>,
152 
153     // lines starting with "warning: ".
154     // line number => line
155     warning_lines: BTreeMap<usize, String>,
156     warning_files: Vec<String>,
157 
158     // output filename => test filename => whether it contains any tests or benchmarks
159     tests: BTreeMap<String, BTreeMap<PathBuf, TestContents>>,
160 
161     errors: Vec<String>,
162     test_errors: Vec<String>,
163 }
164 
match1(regex: &Regex, s: &str) -> Option<String>165 fn match1(regex: &Regex, s: &str) -> Option<String> {
166     regex.captures(s).and_then(|x| x.get(1)).map(|x| x.as_str().to_string())
167 }
168 
match3(regex: &Regex, s: &str) -> Option<(String, String, String)>169 fn match3(regex: &Regex, s: &str) -> Option<(String, String, String)> {
170     regex.captures(s).and_then(|x| match (x.get(1), x.get(2), x.get(3)) {
171         (Some(a), Some(b), Some(c)) => {
172             Some((a.as_str().to_string(), b.as_str().to_string(), c.as_str().to_string()))
173         }
174         _ => None,
175     })
176 }
177 
178 impl CargoOut {
179     /// Parse the output of a `cargo build -v` run.
parse(contents: &str) -> Result<CargoOut>180     fn parse(contents: &str) -> Result<CargoOut> {
181         let mut result = CargoOut::default();
182         let mut in_tests = false;
183         let mut cur_test_key = None;
184         let mut lines_iter = contents.lines().enumerate();
185         while let Some((n, line)) = lines_iter.next() {
186             if line.starts_with("warning: ") {
187                 result.warning_lines.insert(n, line.to_string());
188                 continue;
189             }
190 
191             // Cargo -v output of a call to rustc.
192             static RUSTC_REGEX: LazyLock<Regex> =
193                 LazyLock::new(|| Regex::new(r"^ +Running `(?:/[^\s]*/)?rustc (.*)`$").unwrap());
194             if let Some(args) = match1(&RUSTC_REGEX, line) {
195                 result.rustc_invocations.push(args);
196                 continue;
197             }
198             // Cargo -vv output of a call to rustc could be split into multiple lines.
199             // Assume that the first line will contain some CARGO_* env definition.
200             static RUSTC_VV_REGEX: LazyLock<Regex> =
201                 LazyLock::new(|| Regex::new(r"^ +Running `.*CARGO_.*=.*$").unwrap());
202             if RUSTC_VV_REGEX.is_match(line) {
203                 // cargo build -vv output can have multiple lines for a rustc command due to
204                 // '\n' in strings for environment variables.
205                 let mut line = line.to_string();
206                 loop {
207                     // Use an heuristic to detect the completions of a multi-line command.
208                     if line.ends_with('`') && line.chars().filter(|c| *c == '`').count() % 2 == 0 {
209                         break;
210                     }
211                     if let Some((_, next_line)) = lines_iter.next() {
212                         line += next_line;
213                         continue;
214                     }
215                     break;
216                 }
217                 // The combined -vv output rustc command line pattern.
218                 static RUSTC_VV_CMD_ARGS: LazyLock<Regex> = LazyLock::new(|| {
219                     Regex::new(r"^ *Running `.*CARGO_.*=.* (?:/[^\s]*/)?rustc (.*)`$").unwrap()
220                 });
221                 if let Some(args) = match1(&RUSTC_VV_CMD_ARGS, &line) {
222                     result.rustc_invocations.push(args);
223                 } else {
224                     bail!("failed to parse cargo.out line: {}", line);
225                 }
226                 continue;
227             }
228             // Cargo -vv output of a "cc" or "ar" command; all in one line.
229             static CC_AR_VV_REGEX: LazyLock<Regex> = LazyLock::new(|| {
230                 Regex::new(r#"^\[([^ ]*)[^\]]*\] running:? "(?:/[^\s]*/)?(cc|ar)" (.*)$"#).unwrap()
231             });
232             if let Some((pkg, cmd, args)) = match3(&CC_AR_VV_REGEX, line) {
233                 match cmd.as_str() {
234                     "ar" => result.ar_invocations.insert(pkg, args),
235                     "cc" => result.cc_invocations.insert(pkg, args),
236                     _ => unreachable!(),
237                 };
238                 continue;
239             }
240             // Rustc output of file location path pattern for a warning message.
241             static WARNING_FILE_REGEX: LazyLock<Regex> =
242                 LazyLock::new(|| Regex::new(r"^ *--> ([^:]*):[0-9]+").unwrap());
243             if result.warning_lines.contains_key(&n.saturating_sub(1)) {
244                 if let Some(fpath) = match1(&WARNING_FILE_REGEX, line) {
245                     result.warning_files.push(fpath);
246                     continue;
247                 }
248             }
249             if line.starts_with("error: ") || line.starts_with("error[E") {
250                 if in_tests {
251                     result.test_errors.push(line.to_string());
252                 } else {
253                     result.errors.push(line.to_string());
254                 }
255                 continue;
256             }
257             static CARGO2ANDROID_RUNNING_REGEX: LazyLock<Regex> =
258                 LazyLock::new(|| Regex::new(r"^### Running: .*$").unwrap());
259             if CARGO2ANDROID_RUNNING_REGEX.is_match(line) {
260                 in_tests = line.contains("cargo test") && line.contains("--list");
261                 continue;
262             }
263 
264             // `cargo test -- --list` output
265             // Example: Running unittests src/lib.rs (target.tmp/x86_64-unknown-linux-gnu/debug/deps/aarch64-58b675be7dc09833)
266             static CARGO_TEST_LIST_START_PAT: LazyLock<Regex> = LazyLock::new(|| {
267                 Regex::new(r"^\s*Running (?:unittests )?(.*) \(.*/(.*)\)$").unwrap()
268             });
269             static CARGO_TEST_LIST_END_PAT: LazyLock<Regex> =
270                 LazyLock::new(|| Regex::new(r"^(\d+) tests?, (\d+) benchmarks$").unwrap());
271             if let Some(captures) = CARGO_TEST_LIST_START_PAT.captures(line) {
272                 cur_test_key =
273                     Some((captures.get(2).unwrap().as_str(), captures.get(1).unwrap().as_str()));
274             } else if let Some((output_filename, main_src)) = cur_test_key {
275                 if let Some(captures) = CARGO_TEST_LIST_END_PAT.captures(line) {
276                     let num_tests = captures.get(1).unwrap().as_str().parse::<u32>().unwrap();
277                     let num_benchmarks = captures.get(2).unwrap().as_str().parse::<u32>().unwrap();
278                     result.tests.entry(output_filename.to_owned()).or_default().insert(
279                         PathBuf::from(main_src),
280                         TestContents { tests: num_tests != 0, benchmarks: num_benchmarks != 0 },
281                     );
282                     cur_test_key = None;
283                 }
284             }
285         }
286 
287         // self.find_warning_owners()
288 
289         Ok(result)
290     }
291 }
292 
293 impl Crate {
from_rustc_invocation( rustc: &str, metadata: &WorkspaceMetadata, tests: &BTreeMap<String, BTreeMap<PathBuf, TestContents>>, raw_names: &BTreeMap<String, String>, ) -> Result<Crate>294     fn from_rustc_invocation(
295         rustc: &str,
296         metadata: &WorkspaceMetadata,
297         tests: &BTreeMap<String, BTreeMap<PathBuf, TestContents>>,
298         raw_names: &BTreeMap<String, String>,
299     ) -> Result<Crate> {
300         let mut out = Crate::default();
301         let mut extra_filename = String::new();
302 
303         // split into args
304         let mut arg_iter = args_from_rustc_invocation(rustc).into_iter();
305         // process each arg
306         while let Some(arg) = arg_iter.next() {
307             match arg {
308                 "--crate-name" => out.name = arg_iter.next().unwrap().to_string(),
309                 "--crate-type" => out
310                     .types
311                     .push(CrateType::from_str(arg_iter.next().unwrap().to_string().as_str())),
312                 "--test" => out.types.push(CrateType::Test),
313                 "--target" => out.target = Some(arg_iter.next().unwrap().to_string()),
314                 "--cfg" => {
315                     // example: feature=\"sink\"
316                     let arg = arg_iter.next().unwrap();
317                     if let Some(feature) =
318                         arg.strip_prefix("feature=\"").and_then(|s| s.strip_suffix('\"'))
319                     {
320                         out.features.push(feature.to_string());
321                     } else {
322                         out.cfgs.push(arg.to_string());
323                     }
324                 }
325                 "--extern" => {
326                     // example: proc_macro
327                     // example: memoffset=/some/path/libmemoffset-2cfda327d156e680.rmeta
328                     let arg = arg_iter.next().unwrap();
329                     if let Some((name, path)) = arg.split_once('=') {
330                         let filename = path.split('/').last().unwrap();
331 
332                         // Example filename: "libgetrandom-fd8800939535fc59.rmeta" or "libmls_rs_uniffi.rlib".
333                         static REGEX: LazyLock<Regex> = LazyLock::new(|| {
334                             Regex::new(r"^lib([^-]*)(?:-[0-9a-f]*)?.(rlib|so|rmeta)$").unwrap()
335                         });
336 
337                         let Some(lib_name) = REGEX.captures(filename).and_then(|x| x.get(1)) else {
338                             bail!("bad filename for extern {}: {}", name, filename);
339                         };
340                         let extern_type =
341                             if filename.ends_with(".rlib") || filename.ends_with(".rmeta") {
342                                 ExternType::Rust
343                             } else if filename.ends_with(".so") {
344                                 // Assume .so files are always proc_macros. May not always be right.
345                                 ExternType::ProcMacro
346                             } else {
347                                 bail!("Unexpected extension for extern filename {}", filename);
348                             };
349 
350                         let lib_name = lib_name.as_str().to_string();
351                         let raw_name = if let Some(raw_name) = raw_names.get(&lib_name) {
352                             raw_name.to_owned()
353                         } else {
354                             lib_name.clone()
355                         };
356                         out.externs.push(Extern {
357                             name: name.to_string(),
358                             lib_name,
359                             raw_name,
360                             extern_type,
361                         });
362                     } else if arg != "proc_macro" {
363                         panic!("No filename for {}", arg);
364                     }
365                 }
366                 _ if arg.starts_with("-C") => {
367                     // handle both "-Cfoo" and "-C foo"
368                     let arg = if arg == "-C" {
369                         arg_iter.next().unwrap()
370                     } else {
371                         arg.strip_prefix("-C").unwrap()
372                     };
373                     // 'prefer-dynamic' does not work with common flag -C lto
374                     // 'embed-bitcode' is ignored; we might control LTO with other .bp flag
375                     // 'codegen-units' is set in Android global config or by default
376                     //
377                     // TODO: this is business logic. move it out of the parsing code
378                     if !arg.starts_with("codegen-units=")
379                         && !arg.starts_with("debuginfo=")
380                         && !arg.starts_with("embed-bitcode=")
381                         && !arg.starts_with("extra-filename=")
382                         && !arg.starts_with("incremental=")
383                         && !arg.starts_with("metadata=")
384                         && arg != "prefer-dynamic"
385                     {
386                         out.codegens.push(arg.to_string());
387                     }
388                     if let Some(x) = arg.strip_prefix("extra-filename=") {
389                         extra_filename = x.to_string();
390                     }
391                 }
392                 "--cap-lints" => out.cap_lints = arg_iter.next().unwrap().to_string(),
393                 "-l" => {
394                     let arg = arg_iter.next().unwrap();
395                     if let Some(lib) = arg.strip_prefix("static=") {
396                         out.static_libs.push(lib.to_string());
397                     } else if let Some(lib) = arg.strip_prefix("dylib=") {
398                         out.shared_libs.push(lib.to_string());
399                     } else {
400                         out.shared_libs.push(arg.to_string());
401                     }
402                 }
403                 _ if !arg.starts_with('-') => {
404                     (out.package_dir, out.main_src) = split_src_path(Path::new(arg))?;
405                 }
406 
407                 // ignored flags
408                 "-L" => {
409                     arg_iter.next().unwrap();
410                 }
411                 "--out-dir" => {
412                     arg_iter.next().unwrap();
413                 }
414                 "--color" => {
415                     arg_iter.next().unwrap();
416                 }
417                 "--check-cfg" => {
418                     arg_iter.next().unwrap();
419                 }
420                 _ if arg.starts_with("--error-format=") => {}
421                 _ if arg.starts_with("--emit=") => {}
422                 _ if arg.starts_with("--edition=") => {}
423                 _ if arg.starts_with("--json=") => {}
424                 _ if arg.starts_with("-Aclippy") => {}
425                 _ if arg.starts_with("--allow=clippy") => {}
426                 _ if arg.starts_with("-Wclippy") => {}
427                 _ if arg.starts_with("--warn=clippy") => {}
428                 _ if arg.starts_with("-A=rustdoc") => {}
429                 _ if arg.starts_with("--allow=rustdoc") => {}
430                 _ if arg.starts_with("-D") => {}
431                 _ if arg.starts_with("--deny=") => {}
432                 _ if arg.starts_with("-W") => {}
433                 _ if arg.starts_with("--warn=") => {}
434                 _ if arg.starts_with("--allow=deprecated") => {}
435                 _ if arg.starts_with("--allow=unexpected_cfgs") => {}
436 
437                 arg => bail!("unsupported rustc argument: {arg:?}"),
438             }
439         }
440         out.cfgs.sort();
441         out.cfgs.dedup();
442         out.codegens.sort();
443         out.features.sort();
444 
445         if out.name.is_empty() {
446             bail!("missing --crate-name");
447         }
448         if out.main_src.as_os_str().is_empty() {
449             bail!("missing main source file");
450         }
451         // Must have at least one type.
452         if out.types.is_empty() {
453             if out.cfgs.contains(&"test".to_string()) {
454                 out.types.push(CrateType::TestNoHarness);
455             } else {
456                 bail!("failed to detect crate type. did not have --crate-type or --test or '--cfg test'");
457             }
458         }
459         if out.types.contains(&CrateType::Test) && out.types.len() != 1 {
460             bail!("cannot specify both --test and --crate-type");
461         }
462         if out.types.contains(&CrateType::Lib) && out.types.contains(&CrateType::RLib) {
463             bail!("cannot both have lib and rlib crate types");
464         }
465 
466         // Find the metadata for the crates containing package by matching the manifest's path.
467         let manifest_path = out.package_dir.join("Cargo.toml");
468         let package_metadata = metadata
469             .packages
470             .iter()
471             .find(|p| Path::new(&p.manifest_path).canonicalize().unwrap() == manifest_path)
472             .ok_or_else(|| {
473                 anyhow!(
474                     "can't find metadata for crate {:?} with manifest path {:?}",
475                     out.name,
476                     manifest_path,
477                 )
478             })?;
479         out.package_name.clone_from(&package_metadata.name);
480         out.version = Some(package_metadata.version.clone());
481         out.edition.clone_from(&package_metadata.edition);
482         out.license.clone_from(&package_metadata.license);
483         out.license_file.clone_from(&package_metadata.license_file);
484 
485         let output_filename = out.name.clone() + &extra_filename;
486         if let Some(test_contents) = tests.get(&output_filename).and_then(|m| m.get(&out.main_src))
487         {
488             out.empty_test = !test_contents.tests && !test_contents.benchmarks;
489         }
490 
491         Ok(out)
492     }
493 }
494 
495 /// Given a path to the main source file of some Rust crate, returns the canonical path to the
496 /// package directory, and the relative path to the source file within that directory.
split_src_path(src_path: &Path) -> Result<(PathBuf, PathBuf)>497 fn split_src_path(src_path: &Path) -> Result<(PathBuf, PathBuf)> {
498     // Canonicalize the path because:
499     //
500     // 1. We don't consistently get relative or absolute paths elsewhere. If we
501     //    canonicalize everything, it becomes easy to compare paths.
502     //
503     // 2. We don't want to consider symlinks to code outside the cwd as part of the
504     //    project (e.g. AOSP's import of crosvm has symlinks from crosvm's own 3p
505     //    directory to the android 3p directories).
506     let src_path = src_path
507         .canonicalize()
508         .unwrap_or_else(|e| panic!("failed to canonicalize {src_path:?}: {}", e));
509     let package_dir = find_cargo_toml(&src_path)?;
510     let main_src = src_path.strip_prefix(&package_dir).unwrap().to_path_buf();
511 
512     Ok((package_dir, main_src))
513 }
514 
515 /// Given a path to a Rust source file, finds the closest ancestor directory containing a
516 /// `Cargo.toml` file.
find_cargo_toml(src_path: &Path) -> Result<PathBuf>517 fn find_cargo_toml(src_path: &Path) -> Result<PathBuf> {
518     let mut package_dir = src_path.parent().unwrap();
519     while !package_dir.join("Cargo.toml").try_exists()? {
520         package_dir = package_dir
521             .parent()
522             .ok_or_else(|| anyhow!("No Cargo.toml found in parents of {:?}", src_path))?;
523     }
524     Ok(package_dir.to_path_buf())
525 }
526 
527 #[cfg(test)]
528 mod tests {
529     use super::*;
530 
531     #[test]
parse_args()532     fn parse_args() {
533         assert_eq!(args_from_rustc_invocation("foo bar"), vec!["foo", "bar"]);
534         assert_eq!(args_from_rustc_invocation("  foo   bar "), vec!["foo", "bar"]);
535         assert_eq!(args_from_rustc_invocation("'foo' \"bar\""), vec!["foo", "bar"]);
536         assert_eq!(
537             args_from_rustc_invocation("'fo o' \" b ar\" ' baz '"),
538             vec!["fo o", " b ar", " baz "]
539         );
540     }
541 }
542