use std::cmp::min; use std::error::Error; use std::path::Path; use std::path::PathBuf; use crate::package_info::PackageInfo; use std::process::Command; use log::debug; use regex::Regex; use crate::ProgressCallback; fn clone_repo( url: &str, package: &str, branch: Option<&str>, cwd: Option<&Path>, progress: ProgressCallback<'_>, ) -> Result<(), Box> { let target_path = if let Some(path) = cwd { path.join(package) } else { Path::new(package).to_path_buf() }; let mut callbacks = git2::RemoteCallbacks::new(); if let Some(ref progress_cb) = progress { // Download progress callbacks.transfer_progress(move |stats| { (progress_cb)( "", "Receiving objects...", stats.received_objects(), stats.total_objects(), ); true }); // Remote progress: messages 'Remote: compressing objects 10% (34/340)' // Parse progress informations to display them in callbacks callbacks.sideband_progress(move |data| { let msg = String::from_utf8_lossy(data); let re = Regex::new(r"(.*):[ ]*([0-9]*)% \(([0-9]*)/([0-9]*)\)").unwrap(); if let Some(caps) = re.captures(msg.trim()) { let msg = caps.get(1).map_or("", |m| m.as_str()).to_string(); let objects = caps .get(3) .map_or("", |m| m.as_str()) .to_string() .parse::() .unwrap_or(0); let total = caps .get(4) .map_or("", |m| m.as_str()) .to_string() .parse::() .unwrap_or(0); (progress_cb)("", msg.as_str(), objects, total); } true }); } let mut fetch_options = git2::FetchOptions::new(); fetch_options.remote_callbacks(callbacks); let mut builder = git2::build::RepoBuilder::new(); builder.fetch_options(fetch_options); if let Some(b) = branch { builder.branch(b); } match builder.clone(url, &target_path) { Ok(_repo) => Ok(()), Err(e) => Err(format!("Failed to clone: {}", e).into()), } } use sha2::{Digest, Sha256}; use std::fs::File; use std::io::Write; use flate2::read::GzDecoder; use futures_util::StreamExt; use tar::Archive; use xz2::read::XzDecoder; fn copy_dir_all(src: &Path, dst: &Path) -> Result<(), Box> { if !dst.exists() { std::fs::create_dir_all(dst)?; } for entry in std::fs::read_dir(src)? { let entry = entry?; let src_path = entry.path(); let dst_path = dst.join(entry.file_name()); if src_path.is_dir() { copy_dir_all(&src_path, &dst_path)?; } else { std::fs::copy(&src_path, &dst_path)?; } } Ok(()) } /// Helper function to extract tar archive with progress tracking fn extract_tar_archive( file_path: &Path, dest: &Path, progress: ProgressCallback<'_>, decoder_factory: F, ) -> Result, Box> where D: std::io::Read, F: Fn(File) -> D, { let file = File::open(file_path)?; let decoder = decoder_factory(file); let mut archive = Archive::new(decoder); // Get total number of entries for progress tracking let total_entries = archive.entries()?.count(); let mut current_entry = 0; // Reset the archive to read entries again let file = File::open(file_path)?; let decoder = decoder_factory(file); let mut archive = Archive::new(decoder); let mut extracted_files = Vec::new(); for entry in archive.entries()? { let mut entry = entry?; let path = entry.path()?.to_path_buf(); let dest_path = dest.join(&path); // Create parent directories if needed if let Some(parent) = dest_path.parent() { std::fs::create_dir_all(parent)?; } // Extract the file entry.unpack(&dest_path)?; extracted_files.push(dest_path.to_string_lossy().to_string()); current_entry += 1; // Report progress if let Some(cb) = progress { cb("", "Extracting...", current_entry, total_entries); } } Ok(extracted_files) } fn extract_archive( path: &Path, dest: &Path, progress: ProgressCallback<'_>, ) -> Result, Box> { let filename = path.file_name().unwrap().to_string_lossy(); if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") { extract_tar_archive(path, dest, progress, GzDecoder::new) } else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") { extract_tar_archive(path, dest, progress, XzDecoder::new) } else { Err(format!("Unsupported archive format: {}", filename).into()) } } fn checkout_pristine_tar(package_dir: &Path, filename: &str) -> Result<(), Box> { let output = Command::new("pristine-tar") .current_dir(package_dir) .args(["checkout", format!("../{filename}").as_str()]) .output() .expect("pristine-tar checkout failed"); if !output.status.success() { return Err(format!( "pristine-tar checkout failed with status: {}", output.status ) .into()); } Ok(()) } async fn download_file_checksum( url: &str, checksum: &str, target_dir: &Path, progress: ProgressCallback<'_>, ) -> Result<(), Box> { // Download with reqwest let response = reqwest::get(url).await?; if !response.status().is_success() { return Err(format!("Failed to download '{}' : {}", &url, response.status()).into()); } let total_size = response .content_length() .ok_or(format!("Failed to get content length from '{}'", &url))?; let mut index = 0; // Target file: extract file name from URL let filename = Path::new(url).file_name().unwrap().to_str().unwrap(); let path = target_dir.join(filename); let mut file = File::create(path)?; // Download chunk by chunk to disk, while updating hasher for checksum let mut stream = response.bytes_stream(); let mut hasher = Sha256::new(); while let Some(item) = stream.next().await { let chunk = item?; file.write_all(&chunk)?; hasher.update(&chunk); if let Some(cb) = progress { index = min(index + chunk.len(), total_size as usize); cb("", "Downloading...", index, total_size as usize); } } // Verify checksum let result = hasher.finalize(); let calculated_checksum = hex::encode(result); if calculated_checksum != checksum { return Err(format!( "Checksum mismatch! Expected {}, got {}", checksum, calculated_checksum ) .into()); } Ok(()) } fn setup_pristine_tar_branch(package_dir: &Path, dist: &str) -> Result<(), Box> { let repo = git2::Repository::open(package_dir)?; // Check if local branch already exists if repo .find_branch("pristine-tar", git2::BranchType::Local) .is_ok() { return Ok(()); } // Find remote pristine-tar branch let branches = repo.branches(Some(git2::BranchType::Remote))?; for branch_result in branches { let (branch, _) = branch_result?; if let Some(name) = branch.name()? && name.ends_with(&format!("/{dist}/pristine-tar")) { debug!("Found remote pristine-tar branch: {}", name); let commit = branch.get().peel_to_commit()?; // Create local branch let mut local_branch = repo.branch("pristine-tar", &commit, false)?; // Set upstream local_branch.set_upstream(Some(name))?; debug!("Created local pristine-tar branch tracking {}", name); return Ok(()); } } debug!("No remote pristine-tar branch found."); Ok(()) } async fn fetch_orig_tarball( info: &PackageInfo, cwd: Option<&Path>, progress: ProgressCallback<'_>, ) -> Result<(), Box> { let package_dir = if let Some(path) = cwd { path.join(&info.stanza.package) } else { Path::new(&info.stanza.package).to_path_buf() }; // Find the orig tarball in the file list // Usually ends with .orig.tar.gz or .orig.tar.xz let orig_file = info .stanza .files .iter() .find(|f| f.name.contains(".orig.tar.")) .unwrap(); let filename = &orig_file.name; // 1. Try executing pristine-tar // Setup pristine-tar branch if needed (by tracking remote branch) let _ = setup_pristine_tar_branch(&package_dir, info.dist.as_str()); if let Err(e) = checkout_pristine_tar(&package_dir, filename.as_str()) { debug!( "pristine-tar failed: {}. Falling back to archive download.", e ); // 2. Fallback to archive download // We download to the parent directory of the package repo (which is standard for build tools) // or the current directory if cwd is None (which effectively is the parent of the package dir) let target_dir = cwd.unwrap_or_else(|| Path::new(".")); download_file_checksum( format!("{}/{}", &info.archive_url, filename).as_str(), &orig_file.sha256, target_dir, progress, ) .await?; } Ok(()) } async fn fetch_dsc_file( info: &PackageInfo, cwd: Option<&Path>, progress: ProgressCallback<'_>, ) -> Result<(), Box> { let target_dir = cwd.unwrap_or_else(|| Path::new(".")); // Find the dsc file in the file list let dsc_file = info .stanza .files .iter() .find(|f| f.name.ends_with(".dsc")) .ok_or("Could not find .dsc file in package info")?; let filename = &dsc_file.name; debug!("Fetching dsc file: {}", filename); download_file_checksum( format!("{}/{}", &info.archive_url, filename).as_str(), &dsc_file.sha256, target_dir, progress, ) .await?; Ok(()) } async fn fetch_archive_sources( info: &PackageInfo, cwd: Option<&Path>, progress: ProgressCallback<'_>, ) -> Result<(), Box> { let package_dir = if let Some(path) = cwd { path } else { &Path::new(".").to_path_buf() }; std::fs::create_dir_all(package_dir)?; for file in &info.stanza.files { let url = format!("{}/{}", info.archive_url, file.name); download_file_checksum(&url, &file.sha256, package_dir, progress).await?; // Extract all tar archives, merging extracted directories if file.name.ends_with(".tar.gz") || file.name.ends_with(".tar.xz") { let path = package_dir.join(&file.name); let extract_dir = package_dir.join(&info.stanza.package); let extracted = extract_archive(&path, &extract_dir, progress)?; // Special case: the debian tar does only contain 'debian' if file.name.contains("debian.tar.") { continue; } // List root directories extracted and use the first one as the source directory debug!("Root directories extracted:"); let mut source_dir: Option = None; for file in &extracted { let path = Path::new(file); // Check if this is a directory and is at the archive root level // (i.e., the path relative to extract_dir has no parent components) if let Ok(relative_path) = path.strip_prefix(&extract_dir) && relative_path.components().count() == 1 && path.is_dir() { debug!("- {}", relative_path.file_name().unwrap().to_string_lossy()); // Use the first directory found as the source if source_dir.is_none() { source_dir = Some(path.to_path_buf()); } } } // Use the extracted directory as the source, assuming there is only one if let Some(src_dir) = source_dir { let target_dir = package_dir.join(&info.stanza.package); if target_dir.exists() { // Target exists, we need to merge contents for sub_entry in std::fs::read_dir(&src_dir)? { let sub_entry = sub_entry?; let sub_path = sub_entry.path(); let target_path = target_dir.join(sub_entry.file_name()); if sub_path.is_dir() { std::fs::create_dir_all(&target_path)?; // Recursively copy directory contents copy_dir_all(&sub_path, &target_path)?; } else { std::fs::copy(&sub_path, &target_path)?; } } std::fs::remove_dir_all(&src_dir)?; } else { std::fs::rename(&src_dir, &target_dir)?; } } } // Extract and apply .diff.gz if present (old packages) if file.name.ends_with(".diff.gz") { let diff_gz_path = package_dir.join(&file.name); let source_dir = package_dir.join(&info.stanza.package); // Create the .diff file path by replacing .gz with empty string let diff_path = diff_gz_path.with_extension(""); // Decompress the .diff.gz file directly to .diff let input_file = File::open(&diff_gz_path)?; let mut decoder = GzDecoder::new(input_file); let mut output_file = File::create(&diff_path)?; std::io::copy(&mut decoder, &mut output_file)?; // Use relative path for the diff file (it's in the parent directory) let relative_diff_path = format!("../{}", diff_path.file_name().unwrap().to_string_lossy()); // Apply the patch using the patch command with relative path let output = Command::new("patch") .current_dir(&source_dir) .arg("-p1") .arg("--input") .arg(&relative_diff_path) .output()?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr); return Err( format!("Failed to apply patch: {}\n{}", diff_path.display(), stderr).into(), ); } debug!("Successfully applied patch: {}", diff_path.display()); // Clean up the extracted .diff file std::fs::remove_file(&diff_path)?; } } Ok(()) } /// Pull a source package locally using pre-retrieved package information /// /// This function takes a PackageInfo struct and downloads the package using the preferred method /// (either git or direct archive download), as well as orig tarball, inside 'package' directory. /// The source will be extracted under 'package/package'. pub async fn pull( package_info: &PackageInfo, cwd: Option<&Path>, progress: ProgressCallback<'_>, force_archive: bool, ) -> Result<(), Box> { let package = &package_info.stanza.package; let series = &package_info.series; let package_dir = if let Some(path) = cwd { path.join(package) } else { Path::new(package).to_path_buf() }; /* Fetch the package: either via git (preferred VCS) or the archive */ if let Some(ref url) = package_info.preferred_vcs && !force_archive { // We have found a preferred VCS (git repository) for the package, so // we fetch the package from that repo. // Depending on target series, we pick target branch; if latest series is specified, // we target the development branch, i.e. the default branch let branch_name = if crate::distro_info::get_ordered_series(package_info.dist.as_str()) .await?[0] != *series { if package_info.dist == "ubuntu" { Some(format!("{}/{}", package_info.dist, series)) } else { // Debian does not have reliable branch naming... // For now, we skip that part and clone default // TODO: Inspect remote branches and tags for matches None } } else { None }; if let Some(cb) = progress { cb( &format!( "Cloning {}{}...", url, if let Some(b) = &branch_name { format!(" (branch {})", b) } else { String::new() } ), "", 0, 0, ); } clone_repo( url.as_str(), package, branch_name.as_deref(), Some(&package_dir), progress, )?; if !package_info.is_native() { if let Some(cb) = progress { cb("Fetching orig tarball...", "", 0, 0); } fetch_orig_tarball(package_info, Some(&package_dir), progress).await?; } else { debug!("Native package, skipping orig tarball fetch."); } if let Some(cb) = progress { cb("Fetching dsc file...", "", 0, 0); } fetch_dsc_file(package_info, Some(&package_dir), progress).await?; } else { // Fallback to archive fetching if let Some(cb) = progress { cb("Downloading from archive...", "", 0, 0); } fetch_archive_sources(package_info, Some(&package_dir), progress).await?; } Ok(()) } #[cfg(test)] mod tests { use super::*; async fn test_pull_package_end_to_end( package: &str, series: Option<&str>, dist: Option<&str>, archive: Option, ) { // This test verifies that 'pkh pull' clones the repo and fetches the tarball. // For determinism, we require for tests that either a distro or series is specified, // as no distribution would mean fallback to system distro assert!(dist.is_some() || series.is_some()); // Use a temp directory as working directory let temp_dir = tempfile::tempdir().unwrap(); let cwd = temp_dir.path(); // Main 'pull' command: the one we want to test let info = crate::package_info::lookup(package, None, series, "", dist, None) .await .unwrap(); pull(&info, Some(cwd), None, archive.unwrap_or(false)) .await .unwrap(); let package_dir = cwd.join(package); assert!(package_dir.exists()); let package_source_dir = package_dir.join(package); assert!( package_source_dir.exists(), "Package git repo directory not created" ); assert!( package_source_dir.join("debian").exists(), "debian directory not present" ); if package_source_dir.join(".git").exists() { // Verify we are on the correct branch let repo = git2::Repository::open(&package_source_dir).unwrap(); let head = repo.head().unwrap(); let name = head.name().unwrap(); if let Some(s) = series { // The local branch should be named dist/series // We skip debian for now as it does not have a reliable naming scheme if info.dist == "ubuntu" { assert_eq!(name, format!("refs/heads/{0}/{s}", info.dist)); } } else { // The local branch should be named ubuntu/devel for Ubuntu // Debian unfortunately does not have a reliable naming scheme // Given that there was no series specified, and this is a test, // we require to have a distribution specified if dist.unwrap() == "ubuntu" { assert_eq!(name, "refs/heads/ubuntu/devel"); } } } // Check for orig tarball in package dir (only for non-native packages) let mut found_tarball = false; let mut found_dsc = false; for entry in std::fs::read_dir(package_dir).unwrap() { let entry = entry.unwrap(); let name = entry.file_name().to_string_lossy().to_string(); if name.contains(".orig.tar.") { found_tarball = true; } if name.ends_with(".dsc") { found_dsc = true; } } // Only check for orig tarball if the package is not native if !info.is_native() { assert!(found_tarball, "Orig tarball not found in package dir"); } assert!(found_dsc, "DSC file not found in package dir"); } #[tokio::test] async fn test_pull_hello_ubuntu_end_to_end() { test_pull_package_end_to_end("hello", Some("noble"), None, None).await; } #[tokio::test] async fn test_pull_hello_debian_end_to_end() { test_pull_package_end_to_end("hello", Some("bookworm"), None, None).await; } /// Specific test for a package using a .diff.gz, instead of .debian and .orig #[tokio::test] async fn test_pull_linux_riscv_ubuntu_end_to_end() { test_pull_package_end_to_end("linux-riscv", Some("noble"), None, Some(true)).await; } #[tokio::test] async fn test_pull_2048_universe_ubuntu_end_to_end() { test_pull_package_end_to_end("2048", Some("noble"), None, None).await; } #[tokio::test] async fn test_pull_1oom_contrib_debian_end_to_end() { test_pull_package_end_to_end("1oom", Some("trixie"), None, None).await; } #[tokio::test] async fn test_pull_agg_svn_fallback_ok() { test_pull_package_end_to_end("agg", Some("trixie"), None, None).await; } #[tokio::test] async fn test_pull_hello_debian_latest_end_to_end() { test_pull_package_end_to_end("hello", None, Some("debian"), None).await; } #[tokio::test] async fn test_pull_hello_ubuntu_latest_end_to_end() { test_pull_package_end_to_end("hello", None, Some("ubuntu"), None).await; } }