xref: /aosp_15_r20/external/bazelbuild-rules_rust/crate_universe/src/splicing.rs (revision d4726bddaa87cc4778e7472feed243fa4b6c267f)
1 //! This module is responsible for finding a Cargo workspace
2 
3 pub(crate) mod cargo_config;
4 mod crate_index_lookup;
5 mod splicer;
6 
7 use std::collections::{BTreeMap, BTreeSet};
8 use std::fs;
9 use std::path::{Path, PathBuf};
10 use std::str::FromStr;
11 
12 use anyhow::{anyhow, bail, Context, Result};
13 use cargo_lock::package::SourceKind;
14 use cargo_toml::Manifest;
15 use serde::{Deserialize, Serialize};
16 
17 use crate::config::CrateId;
18 use crate::metadata::{Cargo, CargoUpdateRequest, LockGenerator, TreeResolverMetadata};
19 use crate::utils;
20 use crate::utils::starlark::Label;
21 
22 use self::cargo_config::CargoConfig;
23 use self::crate_index_lookup::CrateIndexLookup;
24 pub(crate) use self::splicer::*;
25 
26 type DirectPackageManifest = BTreeMap<String, cargo_toml::DependencyDetail>;
27 
28 /// A collection of information used for splicing together a new Cargo manifest.
29 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
30 #[serde(deny_unknown_fields)]
31 pub(crate) struct SplicingManifest {
32     /// A set of all packages directly written to the rule
33     pub(crate) direct_packages: DirectPackageManifest,
34 
35     /// A mapping of manifest paths to the labels representing them
36     pub(crate) manifests: BTreeMap<PathBuf, Label>,
37 
38     /// The path of a Cargo config file
39     pub(crate) cargo_config: Option<PathBuf>,
40 
41     /// The Cargo resolver version to use for splicing
42     pub(crate) resolver_version: cargo_toml::Resolver,
43 }
44 
45 impl FromStr for SplicingManifest {
46     type Err = serde_json::Error;
47 
from_str(s: &str) -> Result<Self, Self::Err>48     fn from_str(s: &str) -> Result<Self, Self::Err> {
49         serde_json::from_str(s)
50     }
51 }
52 
53 impl SplicingManifest {
try_from_path<T: AsRef<Path>>(path: T) -> Result<Self>54     pub(crate) fn try_from_path<T: AsRef<Path>>(path: T) -> Result<Self> {
55         let content = fs::read_to_string(path.as_ref())?;
56         Self::from_str(&content).context("Failed to load SplicingManifest")
57     }
58 
resolve(self, workspace_dir: &Path, output_base: &Path) -> Self59     pub(crate) fn resolve(self, workspace_dir: &Path, output_base: &Path) -> Self {
60         let Self {
61             manifests,
62             cargo_config,
63             ..
64         } = self;
65 
66         let workspace_dir_str = workspace_dir.to_string_lossy();
67         let output_base_str = output_base.to_string_lossy();
68 
69         // Ensure manifests all have absolute paths
70         let manifests = manifests
71             .into_iter()
72             .map(|(path, label)| {
73                 let resolved_path = path
74                     .to_string_lossy()
75                     .replace("${build_workspace_directory}", &workspace_dir_str)
76                     .replace("${output_base}", &output_base_str);
77                 (PathBuf::from(resolved_path), label)
78             })
79             .collect();
80 
81         // Ensure the cargo config is located at an absolute path
82         let cargo_config = cargo_config.map(|path| {
83             let resolved_path = path
84                 .to_string_lossy()
85                 .replace("${build_workspace_directory}", &workspace_dir_str)
86                 .replace("${output_base}", &output_base_str);
87             PathBuf::from(resolved_path)
88         });
89 
90         Self {
91             manifests,
92             cargo_config,
93             ..self
94         }
95     }
96 }
97 
98 /// The result of fully resolving a [SplicingManifest] in preparation for splicing.
99 #[derive(Debug, Serialize, Default)]
100 pub(crate) struct SplicingMetadata {
101     /// A set of all packages directly written to the rule
102     pub(crate) direct_packages: DirectPackageManifest,
103 
104     /// A mapping of manifest paths to the labels representing them
105     pub(crate) manifests: BTreeMap<Label, cargo_toml::Manifest>,
106 
107     /// The path of a Cargo config file
108     pub(crate) cargo_config: Option<CargoConfig>,
109 }
110 
111 impl TryFrom<SplicingManifest> for SplicingMetadata {
112     type Error = anyhow::Error;
113 
try_from(value: SplicingManifest) -> Result<Self, Self::Error>114     fn try_from(value: SplicingManifest) -> Result<Self, Self::Error> {
115         let direct_packages = value.direct_packages;
116 
117         let manifests = value
118             .manifests
119             .into_iter()
120             .map(|(path, label)| {
121                 // We read the content of a manifest file to buffer and use `from_slice` to
122                 // parse it. The reason is that the `from_path` version will resolve indirect
123                 // path dependencies in the workspace to absolute path, which causes the hash
124                 // to be unstable. Not resolving implicit data is okay here because the
125                 // workspace manifest is also included in the hash.
126                 // See https://github.com/bazelbuild/rules_rust/issues/2016
127                 let manifest_content = fs::read(&path)
128                     .with_context(|| format!("Failed to load manifest '{}'", path.display()))?;
129                 let manifest = cargo_toml::Manifest::from_slice(&manifest_content)
130                     .with_context(|| format!("Failed to parse manifest '{}'", path.display()))?;
131                 Ok((label, manifest))
132             })
133             .collect::<Result<BTreeMap<Label, Manifest>>>()?;
134 
135         let cargo_config = match value.cargo_config {
136             Some(path) => Some(
137                 CargoConfig::try_from_path(&path)
138                     .with_context(|| format!("Failed to load cargo config '{}'", path.display()))?,
139             ),
140             None => None,
141         };
142 
143         Ok(Self {
144             direct_packages,
145             manifests,
146             cargo_config,
147         })
148     }
149 }
150 
151 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
152 pub(crate) struct SourceInfo {
153     /// A url where to a `.crate` file.
154     pub(crate) url: String,
155 
156     /// The `.crate` file's sha256 checksum.
157     pub(crate) sha256: String,
158 }
159 
160 /// Information about the Cargo workspace relative to the Bazel workspace
161 #[derive(Debug, Default, Serialize, Deserialize)]
162 pub(crate) struct WorkspaceMetadata {
163     /// A mapping of crates to information about where their source can be downloaded
164     pub(crate) sources: BTreeMap<CrateId, SourceInfo>,
165 
166     /// The path from the root of a Bazel workspace to the root of the Cargo workspace
167     pub(crate) workspace_prefix: Option<String>,
168 
169     /// Paths from the root of a Bazel workspace to a Cargo package
170     pub(crate) package_prefixes: BTreeMap<String, String>,
171 
172     /// Feature set for each target triplet and crate.
173     ///
174     /// We store this here because it's computed during the splicing phase via
175     /// calls to "cargo tree" which need the full spliced workspace.
176     pub(crate) tree_metadata: TreeResolverMetadata,
177 }
178 
179 impl TryFrom<toml::Value> for WorkspaceMetadata {
180     type Error = anyhow::Error;
181 
try_from(value: toml::Value) -> Result<Self, Self::Error>182     fn try_from(value: toml::Value) -> Result<Self, Self::Error> {
183         match value.get("cargo-bazel") {
184             Some(v) => v
185                 .to_owned()
186                 .try_into()
187                 .context("Failed to deserialize toml value"),
188             None => bail!("cargo-bazel workspace metadata not found"),
189         }
190     }
191 }
192 
193 impl TryFrom<serde_json::Value> for WorkspaceMetadata {
194     type Error = anyhow::Error;
195 
try_from(value: serde_json::Value) -> Result<Self, Self::Error>196     fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
197         match value.get("cargo-bazel") {
198             Some(value) => {
199                 serde_json::from_value(value.to_owned()).context("Failed to deserialize json value")
200             }
201             None => bail!("cargo-bazel workspace metadata not found"),
202         }
203     }
204 }
205 
206 impl WorkspaceMetadata {
new( splicing_manifest: &SplicingManifest, member_manifests: BTreeMap<&PathBuf, String>, ) -> Result<Self>207     fn new(
208         splicing_manifest: &SplicingManifest,
209         member_manifests: BTreeMap<&PathBuf, String>,
210     ) -> Result<Self> {
211         let mut package_prefixes: BTreeMap<String, String> = member_manifests
212             .iter()
213             .filter_map(|(original_manifest, cargo_package_name)| {
214                 let label = match splicing_manifest.manifests.get(*original_manifest) {
215                     Some(v) => v,
216                     None => return None,
217                 };
218 
219                 let package = match label.package() {
220                     Some(package) if !package.is_empty() => PathBuf::from(package),
221                     Some(_) | None => return None,
222                 };
223 
224                 let prefix = package.to_string_lossy().to_string();
225 
226                 Some((cargo_package_name.clone(), prefix))
227             })
228             .collect();
229 
230         // It is invald for toml maps to use empty strings as keys. In the case
231         // the empty key is expected to be the root package. If the root package
232         // has a prefix, then all other packages will as well (even if no other
233         // manifest represents them). The value is then saved as a separate value
234         let workspace_prefix = package_prefixes.remove("");
235 
236         let package_prefixes = package_prefixes
237             .into_iter()
238             .map(|(k, v)| {
239                 let prefix_path = PathBuf::from(v);
240                 let prefix = prefix_path.parent().unwrap();
241                 (k, prefix.to_string_lossy().to_string())
242             })
243             .collect();
244 
245         Ok(Self {
246             sources: BTreeMap::new(),
247             workspace_prefix,
248             package_prefixes,
249             tree_metadata: TreeResolverMetadata::new(),
250         })
251     }
252 
253     /// Update an existing Cargo manifest with metadata about registry urls and target
254     /// features that are needed in generator steps beyond splicing.
255     #[tracing::instrument(skip_all)]
write_registry_urls_and_feature_map( cargo: &Cargo, lockfile: &cargo_lock::Lockfile, resolver_data: TreeResolverMetadata, input_manifest_path: &Path, output_manifest_path: &Path, ) -> Result<()>256     pub(crate) fn write_registry_urls_and_feature_map(
257         cargo: &Cargo,
258         lockfile: &cargo_lock::Lockfile,
259         resolver_data: TreeResolverMetadata,
260         input_manifest_path: &Path,
261         output_manifest_path: &Path,
262     ) -> Result<()> {
263         let mut manifest = read_manifest(input_manifest_path)?;
264 
265         let mut workspace_metaata = WorkspaceMetadata::try_from(
266             manifest
267                 .workspace
268                 .as_ref()
269                 .unwrap()
270                 .metadata
271                 .as_ref()
272                 .unwrap()
273                 .clone(),
274         )?;
275 
276         // Locate all packages sourced from a registry
277         let pkg_sources: Vec<&cargo_lock::Package> = lockfile
278             .packages
279             .iter()
280             .filter(|pkg| pkg.source.is_some())
281             .filter(|pkg| pkg.source.as_ref().unwrap().is_registry())
282             .collect();
283 
284         // Collect a unique set of index urls
285         let index_urls: BTreeSet<(SourceKind, String)> = pkg_sources
286             .iter()
287             .map(|pkg| {
288                 let source = pkg.source.as_ref().unwrap();
289                 (source.kind().clone(), source.url().to_string())
290             })
291             .collect();
292 
293         // Load the cargo config
294         let cargo_config = {
295             // Note that this path must match the one defined in `splicing::setup_cargo_config`
296             let config_path = input_manifest_path
297                 .parent()
298                 .unwrap()
299                 .join(".cargo")
300                 .join("config.toml");
301 
302             if config_path.exists() {
303                 Some(CargoConfig::try_from_path(&config_path)?)
304             } else {
305                 None
306             }
307         };
308 
309         // Load each index for easy access
310         let crate_indexes = index_urls
311             .into_iter()
312             .map(|(source_kind, url)| {
313                 // Ensure the correct registry is mapped based on the give Cargo config.
314                 let index_url = if let Some(config) = &cargo_config {
315                     config.resolve_replacement_url(&url)?
316                 } else {
317                     &url
318                 };
319                 let index = if cargo.use_sparse_registries_for_crates_io()?
320                     && index_url == utils::CRATES_IO_INDEX_URL
321                 {
322                     CrateIndexLookup::Http(crates_index::SparseIndex::from_url(
323                         "sparse+https://index.crates.io/",
324                     )?)
325                 } else if index_url.starts_with("sparse+") {
326                     CrateIndexLookup::Http(crates_index::SparseIndex::from_url(index_url)?)
327                 } else {
328                     match source_kind {
329                         SourceKind::Registry => {
330                             let index = {
331                                 // Load the index for the current url
332                                 let index = crates_index::GitIndex::from_url(index_url)
333                                     .with_context(|| {
334                                         format!("Failed to load index for url: {index_url}")
335                                     })?;
336 
337                                 // Ensure each index has a valid index config
338                                 index.index_config().with_context(|| {
339                                     format!("`config.json` not found in index: {index_url}")
340                                 })?;
341 
342                                 index
343                             };
344                             CrateIndexLookup::Git(index)
345                         }
346                         SourceKind::SparseRegistry => {
347                             CrateIndexLookup::Http(crates_index::SparseIndex::from_url(
348                                 format!("sparse+{}", index_url).as_str(),
349                             )?)
350                         }
351                         unknown => {
352                             return Err(anyhow!(
353                                 "'{:?}' crate index type is not supported (caused by '{}')",
354                                 &unknown,
355                                 url
356                             ));
357                         }
358                     }
359                 };
360                 Ok((url, index))
361             })
362             .collect::<Result<BTreeMap<String, _>>>()
363             .context("Failed to locate crate indexes")?;
364 
365         // Get the download URL of each package based on it's registry url.
366         let additional_sources = pkg_sources
367             .iter()
368             .map(|pkg| {
369                 let source_id = pkg.source.as_ref().unwrap();
370                 let source_url = source_id.url().to_string();
371                 let lookup = crate_indexes.get(&source_url).ok_or_else(|| {
372                     anyhow!(
373                         "Couldn't find crate_index data for SourceID {:?}",
374                         source_id
375                     )
376                 })?;
377                 lookup.get_source_info(pkg).map(|source_info| {
378                     (
379                         CrateId::new(pkg.name.as_str().to_owned(), pkg.version.clone()),
380                         source_info,
381                     )
382                 })
383             })
384             .collect::<Result<Vec<_>>>()?;
385 
386         workspace_metaata
387             .sources
388             .extend(
389                 additional_sources
390                     .into_iter()
391                     .filter_map(|(crate_id, source_info)| {
392                         source_info.map(|source_info| (crate_id, source_info))
393                     }),
394             );
395         workspace_metaata.tree_metadata = resolver_data;
396         workspace_metaata.inject_into(&mut manifest)?;
397 
398         write_root_manifest(output_manifest_path, manifest)?;
399 
400         Ok(())
401     }
402 
inject_into(&self, manifest: &mut Manifest) -> Result<()>403     fn inject_into(&self, manifest: &mut Manifest) -> Result<()> {
404         let metadata_value = toml::Value::try_from(self)?;
405         let workspace = manifest.workspace.as_mut().unwrap();
406 
407         match &mut workspace.metadata {
408             Some(data) => match data.as_table_mut() {
409                 Some(map) => {
410                     map.insert("cargo-bazel".to_owned(), metadata_value);
411                 }
412                 None => bail!("The metadata field is always expected to be a table"),
413             },
414             None => {
415                 let mut table = toml::map::Map::new();
416                 table.insert("cargo-bazel".to_owned(), metadata_value);
417                 workspace.metadata = Some(toml::Value::Table(table))
418             }
419         }
420 
421         Ok(())
422     }
423 }
424 
425 #[derive(Debug)]
426 pub(crate) enum SplicedManifest {
427     Workspace(PathBuf),
428     Package(PathBuf),
429     MultiPackage(PathBuf),
430 }
431 
432 impl SplicedManifest {
as_path_buf(&self) -> &PathBuf433     pub(crate) fn as_path_buf(&self) -> &PathBuf {
434         match self {
435             SplicedManifest::Workspace(p) => p,
436             SplicedManifest::Package(p) => p,
437             SplicedManifest::MultiPackage(p) => p,
438         }
439     }
440 }
441 
read_manifest(manifest: &Path) -> Result<Manifest>442 pub(crate) fn read_manifest(manifest: &Path) -> Result<Manifest> {
443     let content = fs::read_to_string(manifest)?;
444     cargo_toml::Manifest::from_str(content.as_str()).context("Failed to deserialize manifest")
445 }
446 
generate_lockfile( manifest_path: &SplicedManifest, existing_lock: &Option<PathBuf>, cargo_bin: Cargo, update_request: &Option<CargoUpdateRequest>, ) -> Result<cargo_lock::Lockfile>447 pub(crate) fn generate_lockfile(
448     manifest_path: &SplicedManifest,
449     existing_lock: &Option<PathBuf>,
450     cargo_bin: Cargo,
451     update_request: &Option<CargoUpdateRequest>,
452 ) -> Result<cargo_lock::Lockfile> {
453     let manifest_dir = manifest_path
454         .as_path_buf()
455         .parent()
456         .expect("Every manifest should be contained in a parent directory");
457 
458     let root_lockfile_path = manifest_dir.join("Cargo.lock");
459 
460     // Remove the file so it's not overwitten if it happens to be a symlink.
461     if root_lockfile_path.exists() {
462         fs::remove_file(&root_lockfile_path)?;
463     }
464 
465     // Generate the new lockfile
466     let lockfile = LockGenerator::new(cargo_bin).generate(
467         manifest_path.as_path_buf(),
468         existing_lock,
469         update_request,
470     )?;
471 
472     // Write the lockfile to disk
473     if !root_lockfile_path.exists() {
474         bail!("Failed to generate Cargo.lock file")
475     }
476 
477     Ok(lockfile)
478 }
479 
480 #[cfg(test)]
481 mod test {
482     use super::*;
483 
484     #[test]
deserialize_splicing_manifest()485     fn deserialize_splicing_manifest() {
486         let runfiles = runfiles::Runfiles::create().unwrap();
487         let path = runfiles::rlocation!(
488             runfiles,
489             "rules_rust/crate_universe/test_data/serialized_configs/splicing_manifest.json"
490         );
491 
492         let content = std::fs::read_to_string(path).unwrap();
493 
494         let manifest: SplicingManifest = serde_json::from_str(&content).unwrap();
495 
496         // Check manifests
497         assert_eq!(
498             manifest.manifests,
499             BTreeMap::from([
500                 (
501                     PathBuf::from("${build_workspace_directory}/submod/Cargo.toml"),
502                     Label::from_str("//submod:Cargo.toml").unwrap()
503                 ),
504                 (
505                     PathBuf::from("${output_base}/external_crate/Cargo.toml"),
506                     Label::from_str("@external_crate//:Cargo.toml").unwrap()
507                 ),
508                 (
509                     PathBuf::from("/tmp/abs/path/workspace/Cargo.toml"),
510                     Label::from_str("//:Cargo.toml").unwrap()
511                 ),
512             ])
513         );
514 
515         // Check splicing configs
516         assert_eq!(manifest.resolver_version, cargo_toml::Resolver::V2);
517 
518         // Check packages
519         assert_eq!(manifest.direct_packages.len(), 4);
520         let package = manifest.direct_packages.get("rand").unwrap();
521         assert_eq!(
522             package,
523             &cargo_toml::DependencyDetail {
524                 default_features: false,
525                 features: vec!["small_rng".to_owned()],
526                 version: Some("0.8.5".to_owned()),
527                 ..Default::default()
528             }
529         );
530         let package = manifest.direct_packages.get("cfg-if").unwrap();
531         assert_eq!(
532             package,
533             &cargo_toml::DependencyDetail {
534                 git: Some("https://github.com/rust-lang/cfg-if.git".to_owned()),
535                 rev: Some("b9c2246a".to_owned()),
536                 default_features: true,
537                 ..Default::default()
538             }
539         );
540         let package = manifest.direct_packages.get("log").unwrap();
541         assert_eq!(
542             package,
543             &cargo_toml::DependencyDetail {
544                 git: Some("https://github.com/rust-lang/log.git".to_owned()),
545                 branch: Some("master".to_owned()),
546                 default_features: true,
547                 ..Default::default()
548             }
549         );
550         let package = manifest.direct_packages.get("cargo_toml").unwrap();
551         assert_eq!(
552             package,
553             &cargo_toml::DependencyDetail {
554                 git: Some("https://gitlab.com/crates.rs/cargo_toml.git".to_owned()),
555                 tag: Some("v0.15.2".to_owned()),
556                 default_features: true,
557                 ..Default::default()
558             }
559         );
560 
561         // Check cargo config
562         assert_eq!(
563             manifest.cargo_config,
564             Some(PathBuf::from("/tmp/abs/path/workspace/.cargo/config.toml"))
565         );
566     }
567 
568     #[test]
splicing_manifest_resolve()569     fn splicing_manifest_resolve() {
570         let runfiles = runfiles::Runfiles::create().unwrap();
571         let path = runfiles::rlocation!(
572             runfiles,
573             "rules_rust/crate_universe/test_data/serialized_configs/splicing_manifest.json"
574         );
575 
576         let content = std::fs::read_to_string(path).unwrap();
577 
578         let mut manifest: SplicingManifest = serde_json::from_str(&content).unwrap();
579         manifest.cargo_config = Some(PathBuf::from(
580             "${build_workspace_directory}/.cargo/config.toml",
581         ));
582         manifest = manifest.resolve(
583             &PathBuf::from("/tmp/abs/path/workspace"),
584             &PathBuf::from("/tmp/output_base"),
585         );
586 
587         // Check manifests
588         assert_eq!(
589             manifest.manifests,
590             BTreeMap::from([
591                 (
592                     PathBuf::from("/tmp/abs/path/workspace/submod/Cargo.toml"),
593                     Label::from_str("//submod:Cargo.toml").unwrap()
594                 ),
595                 (
596                     PathBuf::from("/tmp/output_base/external_crate/Cargo.toml"),
597                     Label::from_str("@external_crate//:Cargo.toml").unwrap()
598                 ),
599                 (
600                     PathBuf::from("/tmp/abs/path/workspace/Cargo.toml"),
601                     Label::from_str("//:Cargo.toml").unwrap()
602                 ),
603             ])
604         );
605 
606         // Check cargo config
607         assert_eq!(
608             manifest.cargo_config.unwrap(),
609             PathBuf::from("/tmp/abs/path/workspace/.cargo/config.toml"),
610         )
611     }
612 
613     #[test]
splicing_metadata_workspace_path()614     fn splicing_metadata_workspace_path() {
615         let runfiles = runfiles::Runfiles::create().unwrap();
616         let workspace_manifest_path = runfiles::rlocation!(
617             runfiles,
618             "rules_rust/crate_universe/test_data/metadata/workspace_path/Cargo.toml"
619         );
620         let workspace_path = workspace_manifest_path.parent().unwrap().to_path_buf();
621         let child_a_manifest_path = runfiles::rlocation!(
622             runfiles,
623             "rules_rust/crate_universe/test_data/metadata/workspace_path/child_a/Cargo.toml"
624         );
625         let child_b_manifest_path = runfiles::rlocation!(
626             runfiles,
627             "rules_rust/crate_universe/test_data/metadata/workspace_path/child_b/Cargo.toml"
628         );
629         let manifest = SplicingManifest {
630             direct_packages: BTreeMap::new(),
631             manifests: BTreeMap::from([
632                 (
633                     workspace_manifest_path,
634                     Label::from_str("//:Cargo.toml").unwrap(),
635                 ),
636                 (
637                     child_a_manifest_path,
638                     Label::from_str("//child_a:Cargo.toml").unwrap(),
639                 ),
640                 (
641                     child_b_manifest_path,
642                     Label::from_str("//child_b:Cargo.toml").unwrap(),
643                 ),
644             ]),
645             cargo_config: None,
646             resolver_version: cargo_toml::Resolver::V2,
647         };
648         let metadata = SplicingMetadata::try_from(manifest).unwrap();
649         let metadata = serde_json::to_string(&metadata).unwrap();
650         assert!(
651             !metadata.contains(workspace_path.to_str().unwrap()),
652             "serialized metadata should not contain absolute path"
653         );
654     }
655 }
656