From c466ad184644535568aac0a1209bbf8f589078da Mon Sep 17 00:00:00 2001 From: Valentin Haudiquet Date: Wed, 26 Nov 2025 00:22:09 +0100 Subject: [PATCH] Initial commit - pkh created - basic 'get' command to obtain a source package --- .gitignore | 2 + Cargo.toml | 20 ++++ src/get.rs | 191 ++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 68 +++++++++++ src/package_info.rs | 282 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 564 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/get.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/package_info.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3e2f2a8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.lock +target diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..88380d9 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "pkh" +version = "0.1.0" +edition = "2021" +authors = ["vhaudiquet"] + +[dependencies] +clap = { version = "4.5.51", features = ["cargo"] } +cmd_lib = "2.0.0" +flate2 = "1.1.5" +serde = { version = "1.0.228", features = ["derive"] } +csv = "1.3.0" +reqwest = { version = "0.12.9", features = ["blocking", "json"] } +git2 = "0.19.0" +tokio = { version = "1.41.1", features = ["full"] } +sha2 = "0.10.8" +hex = "0.4.3" + +[dev-dependencies] +tempfile = "3.10.1" diff --git a/src/get.rs b/src/get.rs new file mode 100644 index 0000000..875e7ab --- /dev/null +++ b/src/get.rs @@ -0,0 +1,191 @@ +use serde::Deserialize; +use std::error::Error; +use std::path::Path; + +use pkh::package_info; +use pkh::package_info::PackageInfo; + +use std::process::Command; + +fn clone_repo(url: &str, package: &str, cwd: Option<&Path>) -> Result<(), Box> { + println!("Cloning {} into {}...", url, package); + + let target_path = if let Some(path) = cwd { + path.join(package) + } else { + Path::new(package).to_path_buf() + }; + + git2::Repository::clone(url, target_path)?; + Ok(()) +} + +use sha2::{Sha256, Digest}; +use std::fs::File; +use std::io::Write; + +fn checkout_pristine_tar(package_dir: &Path, filename: &str) -> Result<(), Box> { + println!("Attempting to checkout {} using pristine-tar...", filename); + let status = Command::new("pristine-tar") + .current_dir(package_dir) + .args(&["checkout", format!("../{filename}").as_str()]) + .status()?; + + if !status.success() { + return Err(format!("pristine-tar checkout failed with status: {}", status).into()); + } + Ok(()) +} + +async fn download_file_checksum(url: &str, checksum: &str, target_dir: &Path) -> Result<(), Box> { + // Download with reqwest + let response = reqwest::get(url).await?; + if !response.status().is_success() { + return Err(format!("Failed to download: {}", response.status()).into()); + } + + let content = response.bytes().await?; + + // Verify checksum + let mut hasher = Sha256::new(); + hasher.update(&content); + let result = hasher.finalize(); + let calculated_checksum = hex::encode(result); + + if calculated_checksum != checksum { + return Err(format!("Checksum mismatch! Expected {}, got {}", checksum, calculated_checksum).into()); + } + + // Extract file name from URL + let filename = Path::new(url).file_name().unwrap().to_str().unwrap(); + + // Write to disk + let path = target_dir.join(filename); + let mut file = File::create(path)?; + file.write_all(&content)?; + + Ok(()) +} + +fn setup_pristine_tar_branch(package_dir: &Path, dist: &str) -> Result<(), Box> { + let repo = git2::Repository::open(package_dir)?; + + // Check if local branch already exists + if repo.find_branch("pristine-tar", git2::BranchType::Local).is_ok() { + return Ok(()); + } + + // Find remote pristine-tar branch + let branches = repo.branches(Some(git2::BranchType::Remote))?; + for branch_result in branches { + let (branch, _) = branch_result?; + if let Some(name) = branch.name()? { + if name.ends_with(&format!("/{dist}/pristine-tar")) { + println!("Found remote pristine-tar branch: {}", name); + + let commit = branch.get().peel_to_commit()?; + + // Create local branch + let mut local_branch = repo.branch("pristine-tar", &commit, false)?; + + // Set upstream + local_branch.set_upstream(Some(name))?; + + println!("Created local pristine-tar branch tracking {}", name); + return Ok(()); + } + } + } + + println!("No remote pristine-tar branch found."); + Ok(()) +} + +async fn fetch_orig_tarball(info: &PackageInfo, cwd: Option<&Path>) -> Result<(), Box> { + let package_dir = if let Some(path) = cwd { + path.join(&info.stanza.package) + } else { + Path::new(&info.stanza.package).to_path_buf() + }; + + // Find the orig tarball in the file list + // Usually ends with .orig.tar.gz or .orig.tar.xz + let orig_file = info.stanza.files.iter().find(|f| { + f.name.contains(".orig.tar.") + }).unwrap(); + let filename = &orig_file.name; + + // 1. Try executing pristine-tar + + // Setup pristine-tar branch if needed (by tracking remote branch) + setup_pristine_tar_branch(&package_dir, info.dist.as_str()); + + if let Err(e) = checkout_pristine_tar(&package_dir, filename.as_str()) { + println!("pristine-tar failed: {}. Falling back to archive download.", e); + + // 2. Fallback to archive download + // We download to the parent directory of the package repo (which is standard for build tools) + // or the current directory if cwd is None (which effectively is the parent of the package dir) + let target_dir = cwd.unwrap_or_else(|| Path::new(".")); + download_file_checksum(format!("{}/{}", &info.archive_url, filename).as_str(), &orig_file.sha256, target_dir).await?; + } + + Ok(()) +} + +pub async fn get(package: &str, _version: &str, series: &str, pocket: &str, _ppa: &str, cwd: Option<&Path>) -> Result<(), Box> { + let package_info = package_info::get(package, series, pocket).await; + + if let Ok(Some(info)) = package_info { + if let Some(ref url) = info.preferred_vcs { + clone_repo(url.as_str(), package, cwd)?; + fetch_orig_tarball(&info, cwd).await?; + } + } else { + println!("No VCS URL found for package {}", package); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn test_get_package_end_to_end(package: &str, series: &str) { + // This test verifies that 'pkh get' clones the repo and fetches the tarball. + + // Use a temp directory as working directory + let temp_dir = tempfile::tempdir().unwrap(); + let cwd = temp_dir.path(); + + // Main 'get' command: the one we want to test + get(package, "", series, "", "", Some(cwd)).await.unwrap(); + + let package_dir = cwd.join(package); + assert!(package_dir.exists(), "Package directory not created"); + assert!(package_dir.join(".git").exists(), "Git repo not cloned"); + + // Check for orig tarball in parent dir (cwd) + let mut found_tarball_in_parent = false; + for entry in std::fs::read_dir(cwd).unwrap() { + let entry = entry.unwrap(); + let name = entry.file_name().to_string_lossy().to_string(); + if name.contains(".orig.tar.") { + found_tarball_in_parent = true; + break; + } + } + + assert!(found_tarball_in_parent, "Orig tarball not found in parent dir"); + } + + #[tokio::test] + async fn test_get_hello_ubuntu_end_to_end() { + test_get_package_end_to_end("hello", "noble").await; + } + #[tokio::test] + async fn test_get_hello_debian_end_to_end() { + test_get_package_end_to_end("hello", "bookworm").await; + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..124d198 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod package_info; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..e4fc72a --- /dev/null +++ b/src/main.rs @@ -0,0 +1,68 @@ +use std::env; +use std::error::Error; +use std::collections::HashMap; + +extern crate serde; +use serde::Deserialize; + +extern crate clap; +use clap::{arg, command, value_parser, ArgAction, Command}; + +extern crate flate2; + +extern crate cmd_lib; +use cmd_lib::{run_cmd}; + +mod get; +use get::get; + +fn main() { + let rt = tokio::runtime::Runtime::new().unwrap(); + let matches = command!() + .subcommand_required(true) + .disable_version_flag(true) + .subcommand( + Command::new("get") + .about("Get a source package from the archive or git") + .arg( + arg!(-s --series "Target package distribution series") + .required(false) + ) + .arg( + arg!(-v --version "Target package version") + .required(false) + ) + .arg( + arg!(--ppa "Download the package from a specific PPA") + .required(false) + ) + .arg(arg!( "Target package")) + ) + .subcommand( + Command::new("changelog") + .about("Auto-generate changelog entry, editing it, committing it afterwards") + .arg(arg!(-s --series "Target distribution series").required(false)) + .arg(arg!(--backport "This changelog is for a backport entry").required(false)) + .arg(arg!(-v --version "Target version").required(false)) + + ) + .get_matches(); + + match matches.subcommand() { + Some(("get", sub_matches)) => { + let package = sub_matches.get_one::("package").expect("required"); + let series = sub_matches.get_one::("series").map(|s| s.as_str()).unwrap_or(""); + let version = sub_matches.get_one::("version").map(|s| s.as_str()).unwrap_or(""); + let ppa = sub_matches.get_one::("ppa").map(|s| s.as_str()).unwrap_or(""); + + // Since get is async, we need to block on it + if let Err(e) = rt.block_on(get(package, version, series, "", ppa, None)) { + eprintln!("Error: {}", e); + std::process::exit(1); + } + }, + Some(("changelog", _sub_matches)) => { + }, + _ => unreachable!("Exhausted list of subcommands and subcommand_required prevents `None`"), + } +} \ No newline at end of file diff --git a/src/package_info.rs b/src/package_info.rs new file mode 100644 index 0000000..0733f6e --- /dev/null +++ b/src/package_info.rs @@ -0,0 +1,282 @@ +use flate2::read::GzDecoder; +use std::io::Read; +use std::collections::HashMap; +use serde::Deserialize; +use std::error::Error; +use std::path::Path; + +const BASE_URL_UBUNTU: &str = "http://archive.ubuntu.com/ubuntu"; +const BASE_URL_DEBIAN: &str = "http://deb.debian.org/debian"; + +async fn check_launchpad_repo(package: &str) -> Result, Box> { + let url = format!("https://git.launchpad.net/ubuntu/+source/{}", package); + let client = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .build()?; + let response = client.head(&url).send().await?; + + if response.status().is_success() { + Ok(Some(url)) + } else { + Ok(None) + } +} + +async fn get_series_from_url(url: &str) -> Result, Box> { + let content = reqwest::get(url).await?.text().await?; + let mut rdr = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(content.as_bytes()); + + let headers = rdr.headers()?.clone(); + let series_idx = headers.iter().position(|h| h == "series").ok_or("Column 'series' not found")?; + + let mut series = Vec::new(); + for result in rdr.records() { + let record = result?; + if let Some(s) = record.get(series_idx) { + series.push(s.to_string()); + } + } + Ok(series) +} + +fn get_series_from_file(path: &str) -> Result, Box> { + let mut rdr = csv::ReaderBuilder::new() + .flexible(true) + .from_path(path)?; + + let headers = rdr.headers()?.clone(); + let series_idx = headers.iter().position(|h| h == "series").ok_or("Column 'series' not found")?; + + let mut series = Vec::new(); + for result in rdr.records() { + let record = result?; + if let Some(s) = record.get(series_idx) { + series.push(s.to_string()); + } + } + Ok(series) +} + +pub async fn get_dist_series(dist: &str) -> Result, Box> { + if Path::new(format!("/usr/share/distro-info/{dist}.csv").as_str()).exists() { + get_series_from_file(format!("/usr/share/distro-info/{dist}.csv").as_str()) + } else { + get_series_from_url(format!("https://salsa.debian.org/debian/distro-info-data/-/raw/main/{dist}.csv").as_str()).await + } +} + +async fn get_dist_from_series(series: &str) -> Result> { + let debian_series = get_dist_series("debian").await?; + if debian_series.contains(&series.to_string()) { + return Ok("debian".to_string()); + } + let ubuntu_series = get_dist_series("ubuntu").await?; + if ubuntu_series.contains(&series.to_string()) { + return Ok("ubuntu".to_string()); + } + Err(format!("Unknown series: {}", series).into()) +} + +#[derive(Debug, Clone)] +pub struct FileEntry { + pub name: String, + pub size: u64, + pub sha256: String, +} + +#[derive(Debug)] +pub struct PackageStanza { + pub package: String, + pub version: String, + pub directory: String, + pub vcs_git: Option, + pub vcs_browser: Option, + pub files: Vec, +} + + +#[derive(Debug)] +pub struct PackageInfo { + pub dist: String, + pub stanza: PackageStanza, + pub preferred_vcs: Option, + pub archive_url: String +} + +fn get_sources_url(base_url: &str, series: &str, pocket: &str, component: &str) -> String { + let pocket_full = if pocket.is_empty() { String::new() } else { format!("-{}", pocket) }; + format!("{base_url}/dists/{series}{pocket_full}/{component}/source/Sources.gz") +} + +fn get_base_url(dist: &str) -> &str { + match dist { + "ubuntu" => BASE_URL_UBUNTU, + "debian" => BASE_URL_DEBIAN, + _ => panic!("Unknown distribution"), + } +} + +/* +* Parse a 'Sources.gz' debian package file data, to look for a target package and +* return the data for that package stanza +*/ +fn parse_sources(data: &[u8], target_package: &str) -> Result, Box> { + let mut d = GzDecoder::new(data); + let mut s = String::new(); + d.read_to_string(&mut s)?; + + for stanza in s.split("\n\n") { + let mut fields: HashMap = HashMap::new(); + let mut current_key = String::new(); + + for line in stanza.lines() { + if line.is_empty() { continue; } + + if line.starts_with(' ') || line.starts_with('\t') { + // Continuation line + if let Some(val) = fields.get_mut(¤t_key) { + val.push('\n'); + val.push_str(line.trim()); + } + } else if let Some((key, value)) = line.split_once(':') { + current_key = key.trim().to_string(); + fields.insert(current_key.clone(), value.trim().to_string()); + } + } + + if let Some(pkg) = fields.get("Package") { + if pkg == target_package { + let mut files = Vec::new(); + if let Some(checksums) = fields.get("Checksums-Sha256") { + for line in checksums.lines() { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 3 { + files.push(FileEntry { + sha256: parts[0].to_string(), + size: parts[1].parse().unwrap_or(0), + name: parts[2].to_string(), + }); + } + } + } + + return Ok(Some(PackageStanza { + package: pkg.clone(), + version: fields.get("Version").cloned().unwrap_or_default(), + directory: fields.get("Directory").cloned().unwrap_or_default(), + vcs_git: fields.get("Vcs-Git").cloned(), + vcs_browser: fields.get("Vcs-Browser").cloned(), + files, + })); + } + } + } + + Ok(None) +} + +pub async fn get(package_name: &str, series: &str, pocket: &str) -> Result, Box> { + let dist = get_dist_from_series(series).await?; + + // Handle Ubuntu case: Vcs-Git does not usually point to Launchpad but Salsa + // We need to check manually if there is a launchpad repository for the package + let mut preferred_vcs = None; + if dist == "ubuntu" { + if let Some(lp_url) = check_launchpad_repo(package_name).await? { + println!("Found Launchpad URL: {}", lp_url); + preferred_vcs = Some(lp_url); + } + } + + let base_url = get_base_url(&dist); + + let component = "main"; // TODO: Make configurable or detect + let url = get_sources_url(base_url, series, pocket, component); + + println!("Fetching sources from: {}", url); + + let response = reqwest::get(&url).await?; + let compressed_data = response.bytes().await?; + + println!("Downloaded Sources.gz for {}/{}", dist, series); + + if let Some(stanza) = parse_sources(&compressed_data, package_name)? { + if let Some(vcs) = &stanza.vcs_git { + if preferred_vcs.is_none() { + preferred_vcs = Some(vcs.clone()); + } + } + + let archive_url = format!("{base_url}/{0}", stanza.directory); + return Ok(Some(PackageInfo { + dist: dist, + stanza: stanza, + preferred_vcs: preferred_vcs, + archive_url: archive_url, + })); + } else { + // println!("Package '{}' not found in {}/{}", package, dist, series); + Ok(None) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_check_launchpad_repo() { + // "hello" should exist on Launchpad for Ubuntu + let url = check_launchpad_repo("hello").await.unwrap(); + assert!(url.is_some()); + assert_eq!(url.unwrap(), "https://git.launchpad.net/ubuntu/+source/hello"); + + // "this-package-should-not-exist-12345" should not exist + let url = check_launchpad_repo("this-package-should-not-exist-12345").await.unwrap(); + assert!(url.is_none()); + } + + #[tokio::test] + async fn test_get_debian_series() { + let series = get_dist_series("debian").await.unwrap(); + assert!(series.contains(&"sid".to_string())); + assert!(series.contains(&"bookworm".to_string())); + } + + #[tokio::test] + async fn test_get_ubuntu_series() { + let series = get_dist_series("ubuntu").await.unwrap(); + assert!(series.contains(&"noble".to_string())); + assert!(series.contains(&"jammy".to_string())); + } + + #[tokio::test] + async fn test_get_dist_from_series() { + assert_eq!(get_dist_from_series("sid").await.unwrap(), "debian"); + assert_eq!(get_dist_from_series("noble").await.unwrap(), "ubuntu"); + } + + #[test] + fn test_parse_sources() { + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + + let data = "Package: hello\nVersion: 2.10-2\nDirectory: pool/main/h/hello\nVcs-Git: https://salsa.debian.org/debian/hello.git\n\nPackage: other\nVersion: 1.0\n"; + + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(data.as_bytes()).unwrap(); + let compressed = encoder.finish().unwrap(); + + let info = parse_sources(&compressed, "hello").unwrap().unwrap(); + assert_eq!(info.package, "hello"); + assert_eq!(info.version, "2.10-2"); + assert_eq!(info.directory, "pool/main/h/hello"); + assert_eq!(info.vcs_git.unwrap(), "https://salsa.debian.org/debian/hello.git"); + + let none = parse_sources(&compressed, "missing").unwrap(); + assert!(none.is_none()); + } +}