Files
pkh/src/pull.rs
Valentin Haudiquet 06ab5eaf98
All checks were successful
CI / build (push) Successful in 1m51s
pull: don't fetch orig tarball for native packages
2025-12-16 18:05:59 +01:00

529 lines
16 KiB
Rust

use std::cmp::min;
use std::error::Error;
use std::path::Path;
use crate::package_info;
use crate::package_info::PackageInfo;
use std::process::Command;
use log::debug;
use regex::Regex;
use crate::ProgressCallback;
fn clone_repo(
url: &str,
package: &str,
branch: Option<&str>,
cwd: Option<&Path>,
progress: ProgressCallback<'_>,
) -> Result<(), Box<dyn Error>> {
let target_path = if let Some(path) = cwd {
path.join(package)
} else {
Path::new(package).to_path_buf()
};
let mut callbacks = git2::RemoteCallbacks::new();
if let Some(ref progress_cb) = progress {
// Download progress
callbacks.transfer_progress(move |stats| {
(progress_cb)(
"",
"Receiving objects...",
stats.received_objects(),
stats.total_objects(),
);
true
});
// Remote progress: messages 'Remote: compressing objects 10% (34/340)'
// Parse progress informations to display them in callbacks
callbacks.sideband_progress(move |data| {
let msg = String::from_utf8_lossy(data);
let re = Regex::new(r"(.*):[ ]*([0-9]*)% \(([0-9]*)/([0-9]*)\)").unwrap();
if let Some(caps) = re.captures(msg.trim()) {
let msg = caps.get(1).map_or("", |m| m.as_str()).to_string();
let objects = caps
.get(3)
.map_or("", |m| m.as_str())
.to_string()
.parse::<usize>()
.unwrap_or(0);
let total = caps
.get(4)
.map_or("", |m| m.as_str())
.to_string()
.parse::<usize>()
.unwrap_or(0);
(progress_cb)("", msg.as_str(), objects, total);
}
true
});
}
let mut fetch_options = git2::FetchOptions::new();
fetch_options.remote_callbacks(callbacks);
let mut builder = git2::build::RepoBuilder::new();
builder.fetch_options(fetch_options);
if let Some(b) = branch {
builder.branch(b);
}
match builder.clone(url, &target_path) {
Ok(_repo) => Ok(()),
Err(e) => Err(format!("Failed to clone: {}", e).into()),
}
}
use sha2::{Digest, Sha256};
use std::fs::File;
use std::io::Write;
use flate2::read::GzDecoder;
use futures_util::StreamExt;
use tar::Archive;
use xz2::read::XzDecoder;
fn extract_archive(path: &Path, dest: &Path) -> Result<(), Box<dyn Error>> {
let file = File::open(path)?;
let filename = path.file_name().unwrap().to_string_lossy();
if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") {
let tar = GzDecoder::new(file);
let mut archive = Archive::new(tar);
archive.unpack(dest)?;
} else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") {
let tar = XzDecoder::new(file);
let mut archive = Archive::new(tar);
archive.unpack(dest)?;
} else {
return Err(format!("Unsupported archive format: {}", filename).into());
}
Ok(())
}
fn checkout_pristine_tar(package_dir: &Path, filename: &str) -> Result<(), Box<dyn Error>> {
let output = Command::new("pristine-tar")
.current_dir(package_dir)
.args(["checkout", format!("../{filename}").as_str()])
.output()
.expect("pristine-tar checkout failed");
if !output.status.success() {
return Err(format!(
"pristine-tar checkout failed with status: {}",
output.status
)
.into());
}
Ok(())
}
async fn download_file_checksum(
url: &str,
checksum: &str,
target_dir: &Path,
progress: ProgressCallback<'_>,
) -> Result<(), Box<dyn Error>> {
// Download with reqwest
let response = reqwest::get(url).await?;
if !response.status().is_success() {
return Err(format!("Failed to download '{}' : {}", &url, response.status()).into());
}
let total_size = response
.content_length()
.ok_or(format!("Failed to get content length from '{}'", &url))?;
let mut index = 0;
// Target file: extract file name from URL
let filename = Path::new(url).file_name().unwrap().to_str().unwrap();
let path = target_dir.join(filename);
let mut file = File::create(path)?;
// Download chunk by chunk to disk, while updating hasher for checksum
let mut stream = response.bytes_stream();
let mut hasher = Sha256::new();
while let Some(item) = stream.next().await {
let chunk = item?;
file.write_all(&chunk)?;
hasher.update(&chunk);
if let Some(cb) = progress {
index = min(index + chunk.len(), total_size as usize);
cb("", "Downloading...", index, total_size as usize);
}
}
// Verify checksum
let result = hasher.finalize();
let calculated_checksum = hex::encode(result);
if calculated_checksum != checksum {
return Err(format!(
"Checksum mismatch! Expected {}, got {}",
checksum, calculated_checksum
)
.into());
}
Ok(())
}
fn setup_pristine_tar_branch(package_dir: &Path, dist: &str) -> Result<(), Box<dyn Error>> {
let repo = git2::Repository::open(package_dir)?;
// Check if local branch already exists
if repo
.find_branch("pristine-tar", git2::BranchType::Local)
.is_ok()
{
return Ok(());
}
// Find remote pristine-tar branch
let branches = repo.branches(Some(git2::BranchType::Remote))?;
for branch_result in branches {
let (branch, _) = branch_result?;
if let Some(name) = branch.name()?
&& name.ends_with(&format!("/{dist}/pristine-tar"))
{
debug!("Found remote pristine-tar branch: {}", name);
let commit = branch.get().peel_to_commit()?;
// Create local branch
let mut local_branch = repo.branch("pristine-tar", &commit, false)?;
// Set upstream
local_branch.set_upstream(Some(name))?;
debug!("Created local pristine-tar branch tracking {}", name);
return Ok(());
}
}
debug!("No remote pristine-tar branch found.");
Ok(())
}
async fn fetch_orig_tarball(
info: &PackageInfo,
cwd: Option<&Path>,
progress: ProgressCallback<'_>,
) -> Result<(), Box<dyn Error>> {
let package_dir = if let Some(path) = cwd {
path.join(&info.stanza.package)
} else {
Path::new(&info.stanza.package).to_path_buf()
};
// Find the orig tarball in the file list
// Usually ends with .orig.tar.gz or .orig.tar.xz
let orig_file = info
.stanza
.files
.iter()
.find(|f| f.name.contains(".orig.tar."))
.unwrap();
let filename = &orig_file.name;
// 1. Try executing pristine-tar
// Setup pristine-tar branch if needed (by tracking remote branch)
let _ = setup_pristine_tar_branch(&package_dir, info.dist.as_str());
if let Err(e) = checkout_pristine_tar(&package_dir, filename.as_str()) {
debug!(
"pristine-tar failed: {}. Falling back to archive download.",
e
);
// 2. Fallback to archive download
// We download to the parent directory of the package repo (which is standard for build tools)
// or the current directory if cwd is None (which effectively is the parent of the package dir)
let target_dir = cwd.unwrap_or_else(|| Path::new("."));
download_file_checksum(
format!("{}/{}", &info.archive_url, filename).as_str(),
&orig_file.sha256,
target_dir,
progress,
)
.await?;
}
Ok(())
}
async fn fetch_archive_sources(
info: &PackageInfo,
cwd: Option<&Path>,
progress: ProgressCallback<'_>,
) -> Result<(), Box<dyn Error>> {
let package_dir = if let Some(path) = cwd {
path.join(&info.stanza.package)
} else {
Path::new(&info.stanza.package).to_path_buf()
};
std::fs::create_dir_all(&package_dir)?;
for file in &info.stanza.files {
let url = format!("{}/{}", info.archive_url, file.name);
download_file_checksum(&url, &file.sha256, &package_dir, progress).await?;
}
// Extract the debian tarball or diff
let debian_file = info
.stanza
.files
.iter()
.find(|f| f.name.contains(".debian.tar.") || f.name.contains(".diff.gz"));
if let Some(file) = debian_file {
let path = package_dir.join(&file.name);
let extract_dir = package_dir.join(&info.stanza.package);
if (file.name.ends_with(".tar.xz") || file.name.ends_with(".tar.gz"))
&& let Err(e) = extract_archive(&path, &extract_dir)
{
return Err(format!("Failed to extract {}: {}", file.name, e).into());
}
// Remove archive after extraction
std::fs::remove_file(&path)?;
}
Ok(())
}
pub async fn pull(
package: &str,
_version: &str,
series: Option<&str>,
pocket: &str,
_ppa: &str,
dist: Option<&str>,
cwd: Option<&Path>,
progress: ProgressCallback<'_>,
) -> Result<PackageInfo, Box<dyn Error>> {
let version_opt = if _version.is_empty() {
None
} else {
Some(_version)
};
/* Obtain the package information, either directly in a series or with a search in all series */
let package_info = if let Some(s) = series {
if let Some(cb) = progress {
cb(
&format!("Resolving package info for {}...", package),
"",
0,
0,
);
}
// Get the package information from that series and pocket
package_info::get(package, s, pocket, version_opt).await?
} else {
let dist = dist.unwrap_or_else(||
// Use auto-detection to see if current distro is ubuntu, or fallback to debian by default
if std::process::Command::new("lsb_release").arg("-i").arg("-s").output()
.map(|o| String::from_utf8_lossy(&o.stdout).trim().to_lowercase()).unwrap_or_default() == "ubuntu" {
"ubuntu"
} else {
"debian"
}
);
if let Some(cb) = progress {
cb(
&format!("Searching for package {} in {}...", package, dist),
"",
0,
0,
);
}
// Try to find the package in all series from that dist
package_info::find_package(package, dist, pocket, version_opt, progress).await?
};
let package_dir = if let Some(path) = cwd {
path.join(package)
} else {
Path::new(package).to_path_buf()
};
/* Fetch the package: either via git (preferred VCS) or the archive */
if let Some(ref url) = package_info.preferred_vcs {
// We have found a preferred VCS (git repository) for the package, so
// we fetch the package from that repo.
// Depending on target series, we pick target branch; if no series is specified,
// we target the development branch, i.e. the default branch
let branch_name = if let Some(s) = series {
if package_info.dist == "ubuntu" {
Some(format!("{}/{}", package_info.dist, s))
} else {
// Debian does not have reliable branch naming...
// For now, we skip that part and clone default
// TODO: Inspect remote branches and tags for matches
None
}
} else {
None
};
if let Some(cb) = progress {
cb(
&format!(
"Cloning {}{}...",
url,
if let Some(b) = &branch_name {
format!(" (branch {})", b)
} else {
String::new()
}
),
"",
0,
0,
);
}
clone_repo(
url.as_str(),
package,
branch_name.as_deref(),
Some(&package_dir),
progress,
)?;
if !package_info.is_native() {
if let Some(cb) = progress {
cb("Fetching orig tarball...", "", 0, 0);
}
fetch_orig_tarball(&package_info, Some(&package_dir), progress).await?;
} else {
debug!("Native package, skipping orig tarball fetch.");
}
} else {
// Fallback to archive fetching
if let Some(cb) = progress {
cb("Downloading from archive...", "", 0, 0);
}
fetch_archive_sources(&package_info, Some(cwd.unwrap_or(Path::new("."))), progress).await?;
}
Ok(package_info)
}
#[cfg(test)]
mod tests {
use super::*;
async fn test_pull_package_end_to_end(package: &str, series: Option<&str>, dist: Option<&str>) {
// This test verifies that 'pkh pull' clones the repo and fetches the tarball.
// For determinism, we require for tests that either a distro or series is specified,
// as no distribution would mean fallback to system distro
assert!(dist != None || series != None);
// Use a temp directory as working directory
let temp_dir = tempfile::tempdir().unwrap();
let cwd = temp_dir.path();
// Main 'pull' command: the one we want to test
let info = pull(package, "", series, "", "", dist, Some(cwd), None)
.await
.unwrap();
let package_dir = cwd.join(package);
assert!(package_dir.exists());
let package_source_dir = package_dir.join(package);
assert!(
package_source_dir.exists(),
"Package git repo directory not created"
);
assert!(
package_source_dir.join("debian").exists(),
"debian directory not present"
);
if package_source_dir.join(".git").exists() {
// Verify we are on the correct branch
let repo = git2::Repository::open(&package_source_dir).unwrap();
let head = repo.head().unwrap();
let name = head.name().unwrap();
if let Some(s) = series {
// The local branch should be named dist/series
// We skip debian for now as it does not have a reliable naming scheme
if info.dist == "ubuntu" {
assert_eq!(name, format!("refs/heads/{0}/{s}", info.dist));
}
} else {
// The local branch should be named ubuntu/devel for Ubuntu
// Debian unfortunately does not have a reliable naming scheme
// Given that there was no series specified, and this is a test,
// we require to have a distribution specified
if dist.unwrap() == "ubuntu" {
assert_eq!(name, "refs/heads/ubuntu/devel");
}
}
}
// Check for orig tarball in package dir
let mut found_tarball = false;
for entry in std::fs::read_dir(package_dir).unwrap() {
let entry = entry.unwrap();
let name = entry.file_name().to_string_lossy().to_string();
if name.contains(".orig.tar.") {
found_tarball = true;
break;
}
}
assert!(found_tarball, "Orig tarball not found in package dir");
}
#[tokio::test]
async fn test_pull_hello_ubuntu_end_to_end() {
test_pull_package_end_to_end("hello", Some("noble"), None).await;
}
#[tokio::test]
async fn test_pull_hello_debian_end_to_end() {
test_pull_package_end_to_end("hello", Some("bookworm"), None).await;
}
#[tokio::test]
async fn test_pull_2048_universe_ubuntu_end_to_end() {
test_pull_package_end_to_end("2048", Some("noble"), None).await;
}
#[tokio::test]
async fn test_pull_1oom_contrib_debian_end_to_end() {
test_pull_package_end_to_end("1oom", Some("trixie"), None).await;
}
#[tokio::test]
async fn test_pull_agg_svn_fallback_ok() {
test_pull_package_end_to_end("agg", Some("trixie"), None).await;
}
#[tokio::test]
async fn test_pull_hello_debian_latest_end_to_end() {
test_pull_package_end_to_end("hello", None, Some("debian")).await;
}
#[tokio::test]
async fn test_pull_hello_ubuntu_latest_end_to_end() {
test_pull_package_end_to_end("hello", None, Some("ubuntu")).await;
}
}