Initial commit

- pkh created
- basic 'get' command to obtain a source package
This commit is contained in:
2025-11-26 00:22:09 +01:00
commit c466ad1846
6 changed files with 564 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*.lock
target

20
Cargo.toml Normal file
View File

@@ -0,0 +1,20 @@
[package]
name = "pkh"
version = "0.1.0"
edition = "2021"
authors = ["vhaudiquet"]
[dependencies]
clap = { version = "4.5.51", features = ["cargo"] }
cmd_lib = "2.0.0"
flate2 = "1.1.5"
serde = { version = "1.0.228", features = ["derive"] }
csv = "1.3.0"
reqwest = { version = "0.12.9", features = ["blocking", "json"] }
git2 = "0.19.0"
tokio = { version = "1.41.1", features = ["full"] }
sha2 = "0.10.8"
hex = "0.4.3"
[dev-dependencies]
tempfile = "3.10.1"

191
src/get.rs Normal file
View File

@@ -0,0 +1,191 @@
use serde::Deserialize;
use std::error::Error;
use std::path::Path;
use pkh::package_info;
use pkh::package_info::PackageInfo;
use std::process::Command;
fn clone_repo(url: &str, package: &str, cwd: Option<&Path>) -> Result<(), Box<dyn Error>> {
println!("Cloning {} into {}...", url, package);
let target_path = if let Some(path) = cwd {
path.join(package)
} else {
Path::new(package).to_path_buf()
};
git2::Repository::clone(url, target_path)?;
Ok(())
}
use sha2::{Sha256, Digest};
use std::fs::File;
use std::io::Write;
fn checkout_pristine_tar(package_dir: &Path, filename: &str) -> Result<(), Box<dyn Error>> {
println!("Attempting to checkout {} using pristine-tar...", filename);
let status = Command::new("pristine-tar")
.current_dir(package_dir)
.args(&["checkout", format!("../{filename}").as_str()])
.status()?;
if !status.success() {
return Err(format!("pristine-tar checkout failed with status: {}", status).into());
}
Ok(())
}
async fn download_file_checksum(url: &str, checksum: &str, target_dir: &Path) -> Result<(), Box<dyn Error>> {
// Download with reqwest
let response = reqwest::get(url).await?;
if !response.status().is_success() {
return Err(format!("Failed to download: {}", response.status()).into());
}
let content = response.bytes().await?;
// Verify checksum
let mut hasher = Sha256::new();
hasher.update(&content);
let result = hasher.finalize();
let calculated_checksum = hex::encode(result);
if calculated_checksum != checksum {
return Err(format!("Checksum mismatch! Expected {}, got {}", checksum, calculated_checksum).into());
}
// Extract file name from URL
let filename = Path::new(url).file_name().unwrap().to_str().unwrap();
// Write to disk
let path = target_dir.join(filename);
let mut file = File::create(path)?;
file.write_all(&content)?;
Ok(())
}
fn setup_pristine_tar_branch(package_dir: &Path, dist: &str) -> Result<(), Box<dyn Error>> {
let repo = git2::Repository::open(package_dir)?;
// Check if local branch already exists
if repo.find_branch("pristine-tar", git2::BranchType::Local).is_ok() {
return Ok(());
}
// Find remote pristine-tar branch
let branches = repo.branches(Some(git2::BranchType::Remote))?;
for branch_result in branches {
let (branch, _) = branch_result?;
if let Some(name) = branch.name()? {
if name.ends_with(&format!("/{dist}/pristine-tar")) {
println!("Found remote pristine-tar branch: {}", name);
let commit = branch.get().peel_to_commit()?;
// Create local branch
let mut local_branch = repo.branch("pristine-tar", &commit, false)?;
// Set upstream
local_branch.set_upstream(Some(name))?;
println!("Created local pristine-tar branch tracking {}", name);
return Ok(());
}
}
}
println!("No remote pristine-tar branch found.");
Ok(())
}
async fn fetch_orig_tarball(info: &PackageInfo, cwd: Option<&Path>) -> Result<(), Box<dyn Error>> {
let package_dir = if let Some(path) = cwd {
path.join(&info.stanza.package)
} else {
Path::new(&info.stanza.package).to_path_buf()
};
// Find the orig tarball in the file list
// Usually ends with .orig.tar.gz or .orig.tar.xz
let orig_file = info.stanza.files.iter().find(|f| {
f.name.contains(".orig.tar.")
}).unwrap();
let filename = &orig_file.name;
// 1. Try executing pristine-tar
// Setup pristine-tar branch if needed (by tracking remote branch)
setup_pristine_tar_branch(&package_dir, info.dist.as_str());
if let Err(e) = checkout_pristine_tar(&package_dir, filename.as_str()) {
println!("pristine-tar failed: {}. Falling back to archive download.", e);
// 2. Fallback to archive download
// We download to the parent directory of the package repo (which is standard for build tools)
// or the current directory if cwd is None (which effectively is the parent of the package dir)
let target_dir = cwd.unwrap_or_else(|| Path::new("."));
download_file_checksum(format!("{}/{}", &info.archive_url, filename).as_str(), &orig_file.sha256, target_dir).await?;
}
Ok(())
}
pub async fn get(package: &str, _version: &str, series: &str, pocket: &str, _ppa: &str, cwd: Option<&Path>) -> Result<(), Box<dyn Error>> {
let package_info = package_info::get(package, series, pocket).await;
if let Ok(Some(info)) = package_info {
if let Some(ref url) = info.preferred_vcs {
clone_repo(url.as_str(), package, cwd)?;
fetch_orig_tarball(&info, cwd).await?;
}
} else {
println!("No VCS URL found for package {}", package);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
async fn test_get_package_end_to_end(package: &str, series: &str) {
// This test verifies that 'pkh get' clones the repo and fetches the tarball.
// Use a temp directory as working directory
let temp_dir = tempfile::tempdir().unwrap();
let cwd = temp_dir.path();
// Main 'get' command: the one we want to test
get(package, "", series, "", "", Some(cwd)).await.unwrap();
let package_dir = cwd.join(package);
assert!(package_dir.exists(), "Package directory not created");
assert!(package_dir.join(".git").exists(), "Git repo not cloned");
// Check for orig tarball in parent dir (cwd)
let mut found_tarball_in_parent = false;
for entry in std::fs::read_dir(cwd).unwrap() {
let entry = entry.unwrap();
let name = entry.file_name().to_string_lossy().to_string();
if name.contains(".orig.tar.") {
found_tarball_in_parent = true;
break;
}
}
assert!(found_tarball_in_parent, "Orig tarball not found in parent dir");
}
#[tokio::test]
async fn test_get_hello_ubuntu_end_to_end() {
test_get_package_end_to_end("hello", "noble").await;
}
#[tokio::test]
async fn test_get_hello_debian_end_to_end() {
test_get_package_end_to_end("hello", "bookworm").await;
}
}

1
src/lib.rs Normal file
View File

@@ -0,0 +1 @@
pub mod package_info;

68
src/main.rs Normal file
View File

@@ -0,0 +1,68 @@
use std::env;
use std::error::Error;
use std::collections::HashMap;
extern crate serde;
use serde::Deserialize;
extern crate clap;
use clap::{arg, command, value_parser, ArgAction, Command};
extern crate flate2;
extern crate cmd_lib;
use cmd_lib::{run_cmd};
mod get;
use get::get;
fn main() {
let rt = tokio::runtime::Runtime::new().unwrap();
let matches = command!()
.subcommand_required(true)
.disable_version_flag(true)
.subcommand(
Command::new("get")
.about("Get a source package from the archive or git")
.arg(
arg!(-s --series <series> "Target package distribution series")
.required(false)
)
.arg(
arg!(-v --version <version> "Target package version")
.required(false)
)
.arg(
arg!(--ppa <ppa> "Download the package from a specific PPA")
.required(false)
)
.arg(arg!(<package> "Target package"))
)
.subcommand(
Command::new("changelog")
.about("Auto-generate changelog entry, editing it, committing it afterwards")
.arg(arg!(-s --series <series> "Target distribution series").required(false))
.arg(arg!(--backport "This changelog is for a backport entry").required(false))
.arg(arg!(-v --version <version> "Target version").required(false))
)
.get_matches();
match matches.subcommand() {
Some(("get", sub_matches)) => {
let package = sub_matches.get_one::<String>("package").expect("required");
let series = sub_matches.get_one::<String>("series").map(|s| s.as_str()).unwrap_or("");
let version = sub_matches.get_one::<String>("version").map(|s| s.as_str()).unwrap_or("");
let ppa = sub_matches.get_one::<String>("ppa").map(|s| s.as_str()).unwrap_or("");
// Since get is async, we need to block on it
if let Err(e) = rt.block_on(get(package, version, series, "", ppa, None)) {
eprintln!("Error: {}", e);
std::process::exit(1);
}
},
Some(("changelog", _sub_matches)) => {
},
_ => unreachable!("Exhausted list of subcommands and subcommand_required prevents `None`"),
}
}

282
src/package_info.rs Normal file
View File

@@ -0,0 +1,282 @@
use flate2::read::GzDecoder;
use std::io::Read;
use std::collections::HashMap;
use serde::Deserialize;
use std::error::Error;
use std::path::Path;
const BASE_URL_UBUNTU: &str = "http://archive.ubuntu.com/ubuntu";
const BASE_URL_DEBIAN: &str = "http://deb.debian.org/debian";
async fn check_launchpad_repo(package: &str) -> Result<Option<String>, Box<dyn Error>> {
let url = format!("https://git.launchpad.net/ubuntu/+source/{}", package);
let client = reqwest::Client::builder()
.redirect(reqwest::redirect::Policy::none())
.build()?;
let response = client.head(&url).send().await?;
if response.status().is_success() {
Ok(Some(url))
} else {
Ok(None)
}
}
async fn get_series_from_url(url: &str) -> Result<Vec<String>, Box<dyn Error>> {
let content = reqwest::get(url).await?.text().await?;
let mut rdr = csv::ReaderBuilder::new()
.flexible(true)
.from_reader(content.as_bytes());
let headers = rdr.headers()?.clone();
let series_idx = headers.iter().position(|h| h == "series").ok_or("Column 'series' not found")?;
let mut series = Vec::new();
for result in rdr.records() {
let record = result?;
if let Some(s) = record.get(series_idx) {
series.push(s.to_string());
}
}
Ok(series)
}
fn get_series_from_file(path: &str) -> Result<Vec<String>, Box<dyn Error>> {
let mut rdr = csv::ReaderBuilder::new()
.flexible(true)
.from_path(path)?;
let headers = rdr.headers()?.clone();
let series_idx = headers.iter().position(|h| h == "series").ok_or("Column 'series' not found")?;
let mut series = Vec::new();
for result in rdr.records() {
let record = result?;
if let Some(s) = record.get(series_idx) {
series.push(s.to_string());
}
}
Ok(series)
}
pub async fn get_dist_series(dist: &str) -> Result<Vec<String>, Box<dyn Error>> {
if Path::new(format!("/usr/share/distro-info/{dist}.csv").as_str()).exists() {
get_series_from_file(format!("/usr/share/distro-info/{dist}.csv").as_str())
} else {
get_series_from_url(format!("https://salsa.debian.org/debian/distro-info-data/-/raw/main/{dist}.csv").as_str()).await
}
}
async fn get_dist_from_series(series: &str) -> Result<String, Box<dyn Error>> {
let debian_series = get_dist_series("debian").await?;
if debian_series.contains(&series.to_string()) {
return Ok("debian".to_string());
}
let ubuntu_series = get_dist_series("ubuntu").await?;
if ubuntu_series.contains(&series.to_string()) {
return Ok("ubuntu".to_string());
}
Err(format!("Unknown series: {}", series).into())
}
#[derive(Debug, Clone)]
pub struct FileEntry {
pub name: String,
pub size: u64,
pub sha256: String,
}
#[derive(Debug)]
pub struct PackageStanza {
pub package: String,
pub version: String,
pub directory: String,
pub vcs_git: Option<String>,
pub vcs_browser: Option<String>,
pub files: Vec<FileEntry>,
}
#[derive(Debug)]
pub struct PackageInfo {
pub dist: String,
pub stanza: PackageStanza,
pub preferred_vcs: Option<String>,
pub archive_url: String
}
fn get_sources_url(base_url: &str, series: &str, pocket: &str, component: &str) -> String {
let pocket_full = if pocket.is_empty() { String::new() } else { format!("-{}", pocket) };
format!("{base_url}/dists/{series}{pocket_full}/{component}/source/Sources.gz")
}
fn get_base_url(dist: &str) -> &str {
match dist {
"ubuntu" => BASE_URL_UBUNTU,
"debian" => BASE_URL_DEBIAN,
_ => panic!("Unknown distribution"),
}
}
/*
* Parse a 'Sources.gz' debian package file data, to look for a target package and
* return the data for that package stanza
*/
fn parse_sources(data: &[u8], target_package: &str) -> Result<Option<PackageStanza>, Box<dyn Error>> {
let mut d = GzDecoder::new(data);
let mut s = String::new();
d.read_to_string(&mut s)?;
for stanza in s.split("\n\n") {
let mut fields: HashMap<String, String> = HashMap::new();
let mut current_key = String::new();
for line in stanza.lines() {
if line.is_empty() { continue; }
if line.starts_with(' ') || line.starts_with('\t') {
// Continuation line
if let Some(val) = fields.get_mut(&current_key) {
val.push('\n');
val.push_str(line.trim());
}
} else if let Some((key, value)) = line.split_once(':') {
current_key = key.trim().to_string();
fields.insert(current_key.clone(), value.trim().to_string());
}
}
if let Some(pkg) = fields.get("Package") {
if pkg == target_package {
let mut files = Vec::new();
if let Some(checksums) = fields.get("Checksums-Sha256") {
for line in checksums.lines() {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 3 {
files.push(FileEntry {
sha256: parts[0].to_string(),
size: parts[1].parse().unwrap_or(0),
name: parts[2].to_string(),
});
}
}
}
return Ok(Some(PackageStanza {
package: pkg.clone(),
version: fields.get("Version").cloned().unwrap_or_default(),
directory: fields.get("Directory").cloned().unwrap_or_default(),
vcs_git: fields.get("Vcs-Git").cloned(),
vcs_browser: fields.get("Vcs-Browser").cloned(),
files,
}));
}
}
}
Ok(None)
}
pub async fn get(package_name: &str, series: &str, pocket: &str) -> Result<Option<PackageInfo>, Box<dyn Error>> {
let dist = get_dist_from_series(series).await?;
// Handle Ubuntu case: Vcs-Git does not usually point to Launchpad but Salsa
// We need to check manually if there is a launchpad repository for the package
let mut preferred_vcs = None;
if dist == "ubuntu" {
if let Some(lp_url) = check_launchpad_repo(package_name).await? {
println!("Found Launchpad URL: {}", lp_url);
preferred_vcs = Some(lp_url);
}
}
let base_url = get_base_url(&dist);
let component = "main"; // TODO: Make configurable or detect
let url = get_sources_url(base_url, series, pocket, component);
println!("Fetching sources from: {}", url);
let response = reqwest::get(&url).await?;
let compressed_data = response.bytes().await?;
println!("Downloaded Sources.gz for {}/{}", dist, series);
if let Some(stanza) = parse_sources(&compressed_data, package_name)? {
if let Some(vcs) = &stanza.vcs_git {
if preferred_vcs.is_none() {
preferred_vcs = Some(vcs.clone());
}
}
let archive_url = format!("{base_url}/{0}", stanza.directory);
return Ok(Some(PackageInfo {
dist: dist,
stanza: stanza,
preferred_vcs: preferred_vcs,
archive_url: archive_url,
}));
} else {
// println!("Package '{}' not found in {}/{}", package, dist, series);
Ok(None)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_check_launchpad_repo() {
// "hello" should exist on Launchpad for Ubuntu
let url = check_launchpad_repo("hello").await.unwrap();
assert!(url.is_some());
assert_eq!(url.unwrap(), "https://git.launchpad.net/ubuntu/+source/hello");
// "this-package-should-not-exist-12345" should not exist
let url = check_launchpad_repo("this-package-should-not-exist-12345").await.unwrap();
assert!(url.is_none());
}
#[tokio::test]
async fn test_get_debian_series() {
let series = get_dist_series("debian").await.unwrap();
assert!(series.contains(&"sid".to_string()));
assert!(series.contains(&"bookworm".to_string()));
}
#[tokio::test]
async fn test_get_ubuntu_series() {
let series = get_dist_series("ubuntu").await.unwrap();
assert!(series.contains(&"noble".to_string()));
assert!(series.contains(&"jammy".to_string()));
}
#[tokio::test]
async fn test_get_dist_from_series() {
assert_eq!(get_dist_from_series("sid").await.unwrap(), "debian");
assert_eq!(get_dist_from_series("noble").await.unwrap(), "ubuntu");
}
#[test]
fn test_parse_sources() {
use flate2::write::GzEncoder;
use flate2::Compression;
use std::io::Write;
let data = "Package: hello\nVersion: 2.10-2\nDirectory: pool/main/h/hello\nVcs-Git: https://salsa.debian.org/debian/hello.git\n\nPackage: other\nVersion: 1.0\n";
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(data.as_bytes()).unwrap();
let compressed = encoder.finish().unwrap();
let info = parse_sources(&compressed, "hello").unwrap().unwrap();
assert_eq!(info.package, "hello");
assert_eq!(info.version, "2.10-2");
assert_eq!(info.directory, "pool/main/h/hello");
assert_eq!(info.vcs_git.unwrap(), "https://salsa.debian.org/debian/hello.git");
let none = parse_sources(&compressed, "missing").unwrap();
assert!(none.is_none());
}
}