From ab35af5fb5ecf7911a048e12dcd2388ae6c1ceed Mon Sep 17 00:00:00 2001 From: Valentin Haudiquet Date: Wed, 14 Jan 2026 21:18:25 +0100 Subject: [PATCH] package_info: refactor into distro_info and package_info split, yaml data --- Cargo.toml | 2 + distro_info.yml | 26 ++++++ src/distro_info.rs | 185 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/package_info.rs | 195 ++------------------------------------------ src/pull.rs | 2 +- 6 files changed, 224 insertions(+), 188 deletions(-) create mode 100644 distro_info.yml create mode 100644 src/distro_info.rs diff --git a/Cargo.toml b/Cargo.toml index 4cb9855..787b788 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,8 @@ serde_json = "1.0.145" directories = "6.0.0" ssh2 = "0.9.5" gpgme = "0.11" +serde_yaml = "0.9" +lazy_static = "1.4.0" [dev-dependencies] test-log = "0.2.19" diff --git a/distro_info.yml b/distro_info.yml new file mode 100644 index 0000000..87db24e --- /dev/null +++ b/distro_info.yml @@ -0,0 +1,26 @@ +## Static data needed for pkh operations +## Instead of hardcoding the data in code, data files allow to quickly +## update and maintain such data in one unique place +## The goal is to have the minimal possible set of data necessary +## to grab the actual data. For example we don't want to store every Ubuntu +## or Debian series, but rather an URL where we can properly access that data. +dist_info: + local: /usr/share/distro-info/{dist} + network: https://salsa.debian.org/debian/distro-info-data/-/raw/main/ +dist: + debian: + base_url: http://deb.debian.org/debian + pockets: + - proposed-updates + - updates + series: + local: /usr/share/distro-info/debian.csv + network: https://salsa.debian.org/debian/distro-info-data/-/raw/main/debian.csv + ubuntu: + base_url: http://archive.ubuntu.com/ubuntu + pockets: + - proposed + - updates + series: + local: /usr/share/distro-info/ubuntu.csv + network: https://salsa.debian.org/debian/distro-info-data/-/raw/main/ubuntu.csv \ No newline at end of file diff --git a/src/distro_info.rs b/src/distro_info.rs new file mode 100644 index 0000000..397a4f3 --- /dev/null +++ b/src/distro_info.rs @@ -0,0 +1,185 @@ +use chrono::NaiveDate; +use lazy_static::lazy_static; +use serde::Deserialize; +use std::error::Error; +use std::path::Path; + +#[derive(Debug, Deserialize)] +struct SeriesInfo { + local: String, + network: String, +} + +#[derive(Debug, Deserialize)] +struct DistData { + base_url: String, + pockets: Vec, + series: SeriesInfo, +} + +#[derive(Debug, Deserialize)] +struct Data { + dist: std::collections::HashMap, +} + +const DATA_YAML: &str = include_str!("../distro_info.yml"); +lazy_static! { + static ref DATA: Data = serde_yaml::from_str(DATA_YAML).unwrap(); +} + +fn parse_series_csv(content: &str) -> Result, Box> { + let mut rdr = csv::ReaderBuilder::new() + .flexible(true) + .from_reader(content.as_bytes()); + + let headers = rdr.headers()?.clone(); + let series_idx = headers + .iter() + .position(|h| h == "series") + .ok_or("Column 'series' not found")?; + let created_idx = headers + .iter() + .position(|h| h == "created") + .ok_or("Column 'created' not found")?; + + let mut entries = Vec::new(); + for result in rdr.records() { + let record = result?; + if let (Some(s), Some(c)) = (record.get(series_idx), record.get(created_idx)) + && let Ok(date) = NaiveDate::parse_from_str(c, "%Y-%m-%d") + { + entries.push((s.to_string(), date)); + } + } + + // Sort by date descending (newest first) + entries.sort_by(|a, b| b.1.cmp(&a.1)); + + Ok(entries.into_iter().map(|(s, _)| s).collect()) +} + +/// Get time-ordered list of series for a distribution, development series first +pub async fn get_ordered_series(dist: &str) -> Result, Box> { + let series_info = &DATA.dist.get(dist).unwrap().series; + let content = if Path::new(series_info.local.as_str()).exists() { + std::fs::read_to_string(format!("/usr/share/distro-info/{dist}.csv"))? + } else { + reqwest::get(series_info.network.as_str()) + .await? + .text() + .await? + }; + + let mut series = parse_series_csv(&content)?; + + // For Debian, ensure 'sid' is first if it's not + // We want to try 'sid' (unstable) first for Debian. + if dist == "debian" { + series.retain(|s| s != "sid"); + series.insert(0, "sid".to_string()); + } + + Ok(series) +} + +/// Obtain the distribution (eg. debian, ubuntu) from a distribution series (eg. noble, bookworm) +pub async fn get_dist_from_series(series: &str) -> Result> { + for dist in DATA.dist.keys() { + if get_ordered_series(dist) + .await? + .contains(&series.to_string()) + { + return Ok(dist.to_string()); + } + } + Err(format!("Unknown series: {}", series).into()) +} + +/// Get the package pockets available for a given distribution +/// +/// Example: get_dist_pockets(ubuntu) => ["proposed", "updates", ""] +pub fn get_dist_pockets(dist: &str) -> Vec { + let mut pockets = DATA.dist.get(dist).unwrap().pockets.clone(); + + // Explicitely add 'main' pocket, which is just the empty string + pockets.push("".to_string()); + + pockets +} + +/// Get the sources URL for a distribution, series, pocket, and component +pub fn get_sources_url(base_url: &str, series: &str, pocket: &str, component: &str) -> String { + let pocket_full = if pocket.is_empty() { + String::new() + } else { + format!("-{}", pocket) + }; + format!("{base_url}/dists/{series}{pocket_full}/{component}/source/Sources.gz") +} + +/// Get the archive base URL for a distribution +/// +/// Example: ubuntu => http://archive.ubuntu.com/ubuntu +pub fn get_base_url(dist: &str) -> String { + DATA.dist.get(dist).unwrap().base_url.clone() +} + +/// Obtain the URL for the 'Release' file of a distribution series +fn get_release_url(base_url: &str, series: &str, pocket: &str) -> String { + let pocket_full = if pocket.is_empty() { + String::new() + } else { + format!("-{}", pocket) + }; + format!("{base_url}/dists/{series}{pocket_full}/Release") +} + +/// Obtain the components of a distribution series by parsing the 'Release' file +pub async fn get_components( + base_url: &str, + series: &str, + pocket: &str, +) -> Result, Box> { + let url = get_release_url(base_url, series, pocket); + log::debug!("Fetching Release file from: {}", url); + + let content = reqwest::get(&url).await?.text().await?; + + for line in content.lines() { + if line.starts_with("Components:") + && let Some((_, components)) = line.split_once(':') + { + return Ok(components + .split_whitespace() + .map(|s| s.to_string()) + .collect()); + } + } + + Err("Components not found.".into()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_get_debian_series() { + let series = get_ordered_series("debian").await.unwrap(); + assert!(series.contains(&"sid".to_string())); + assert!(series.contains(&"bookworm".to_string())); + } + + #[tokio::test] + async fn test_get_ubuntu_series() { + let series = get_ordered_series("ubuntu").await.unwrap(); + assert!(series.contains(&"noble".to_string())); + assert!(series.contains(&"jammy".to_string())); + } + + #[tokio::test] + async fn test_get_dist_from_series() { + assert_eq!(get_dist_from_series("sid").await.unwrap(), "debian"); + assert_eq!(get_dist_from_series("noble").await.unwrap(), "ubuntu"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 03bdf34..e5ac7ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,8 @@ pub mod build; pub mod changelog; /// Build a Debian package into a binary (.deb) pub mod deb; +/// Obtain general information about distribution, series, etc +pub mod distro_info; /// Obtain information about one or multiple packages pub mod package_info; /// Download a source package locally diff --git a/src/package_info.rs b/src/package_info.rs index 3113523..9980176 100644 --- a/src/package_info.rs +++ b/src/package_info.rs @@ -1,16 +1,11 @@ -use chrono::NaiveDate; use flate2::read::GzDecoder; use std::collections::HashMap; use std::error::Error; use std::io::Read; -use std::path::Path; use crate::ProgressCallback; use log::{debug, warn}; -const BASE_URL_UBUNTU: &str = "http://archive.ubuntu.com/ubuntu"; -const BASE_URL_DEBIAN: &str = "http://deb.debian.org/debian"; - async fn check_launchpad_repo(package: &str) -> Result, Box> { let url = format!("https://git.launchpad.net/ubuntu/+source/{}", package); let client = reqwest::Client::builder() @@ -25,100 +20,6 @@ async fn check_launchpad_repo(package: &str) -> Result, Box Result, Box> { - let mut rdr = csv::ReaderBuilder::new() - .flexible(true) - .from_reader(content.as_bytes()); - - let headers = rdr.headers()?.clone(); - let series_idx = headers - .iter() - .position(|h| h == "series") - .ok_or("Column 'series' not found")?; - let created_idx = headers - .iter() - .position(|h| h == "created") - .ok_or("Column 'created' not found")?; - - let mut entries = Vec::new(); - for result in rdr.records() { - let record = result?; - if let (Some(s), Some(c)) = (record.get(series_idx), record.get(created_idx)) - && let Ok(date) = NaiveDate::parse_from_str(c, "%Y-%m-%d") - { - entries.push((s.to_string(), date)); - } - } - - // Sort by date descending (newest first) - entries.sort_by(|a, b| b.1.cmp(&a.1)); - - Ok(entries.into_iter().map(|(s, _)| s).collect()) -} - -/// Get time-ordered list of series for a distribution, development series first -pub async fn get_ordered_series(dist: &str) -> Result, Box> { - let content = if Path::new(format!("/usr/share/distro-info/{dist}.csv").as_str()).exists() { - std::fs::read_to_string(format!("/usr/share/distro-info/{dist}.csv"))? - } else { - reqwest::get( - format!("https://salsa.debian.org/debian/distro-info-data/-/raw/main/{dist}.csv") - .as_str(), - ) - .await? - .text() - .await? - }; - - let mut series = parse_series_csv(&content)?; - - // For Debian, ensure 'sid' is first if it's not - // We want to try 'sid' (unstable) first for Debian. - if dist == "debian" { - series.retain(|s| s != "sid"); - series.insert(0, "sid".to_string()); - } - - Ok(series) -} - -// Keep existing functions for compatibility or refactor them to use get_ordered_series -async fn get_series_from_url(url: &str) -> Result, Box> { - let content = reqwest::get(url).await?.text().await?; - parse_series_csv(&content) -} - -fn get_series_from_file(path: &str) -> Result, Box> { - let content = std::fs::read_to_string(path)?; - parse_series_csv(&content) -} - -/// Obtain a list of series from a distribution -pub async fn get_dist_series(dist: &str) -> Result, Box> { - if Path::new(format!("/usr/share/distro-info/{dist}.csv").as_str()).exists() { - get_series_from_file(format!("/usr/share/distro-info/{dist}.csv").as_str()) - } else { - get_series_from_url( - format!("https://salsa.debian.org/debian/distro-info-data/-/raw/main/{dist}.csv") - .as_str(), - ) - .await - } -} - -/// Obtain the distribution (eg. debian, ubuntu) from a distribution series (eg. noble, bookworm) -pub async fn get_dist_from_series(series: &str) -> Result> { - let debian_series = get_dist_series("debian").await?; - if debian_series.contains(&series.to_string()) { - return Ok("debian".to_string()); - } - let ubuntu_series = get_dist_series("ubuntu").await?; - if ubuntu_series.contains(&series.to_string()) { - return Ok("ubuntu".to_string()); - } - Err(format!("Unknown series: {}", series).into()) -} - /// A File used in a source package #[derive(Debug, Clone)] pub struct FileEntry { @@ -173,66 +74,6 @@ impl PackageInfo { } } -fn get_dist_pockets(dist: &str) -> Vec<&'static str> { - match dist { - "ubuntu" => vec!["proposed", "updates", ""], - "debian" => vec!["proposed-updates", "updates", ""], - _ => vec![""], - } -} - -fn get_sources_url(base_url: &str, series: &str, pocket: &str, component: &str) -> String { - let pocket_full = if pocket.is_empty() { - String::new() - } else { - format!("-{}", pocket) - }; - format!("{base_url}/dists/{series}{pocket_full}/{component}/source/Sources.gz") -} - -fn get_base_url(dist: &str) -> &str { - match dist { - "ubuntu" => BASE_URL_UBUNTU, - "debian" => BASE_URL_DEBIAN, - _ => panic!("Unknown distribution"), - } -} - -/// Obtain the URL for the 'Release' file of a distribution series -fn get_release_url(base_url: &str, series: &str, pocket: &str) -> String { - let pocket_full = if pocket.is_empty() { - String::new() - } else { - format!("-{}", pocket) - }; - format!("{base_url}/dists/{series}{pocket_full}/Release") -} - -/// Obtain the components of a distribution series by parsing the 'Release' file -async fn get_components( - base_url: &str, - series: &str, - pocket: &str, -) -> Result, Box> { - let url = get_release_url(base_url, series, pocket); - debug!("Fetching Release file from: {}", url); - - let content = reqwest::get(&url).await?.text().await?; - - for line in content.lines() { - if line.starts_with("Components:") - && let Some((_, components)) = line.split_once(':') - { - return Ok(components - .split_whitespace() - .map(|s| s.to_string()) - .collect()); - } - } - - Err("Components not found.".into()) -} - struct DebianSources { splitted_sources: std::str::Split<'static, &'static str>, } @@ -338,7 +179,7 @@ async fn get( pocket: &str, version: Option<&str>, ) -> Result> { - let dist = get_dist_from_series(series).await?; + let dist = crate::distro_info::get_dist_from_series(series).await?; // Handle Ubuntu case: Vcs-Git does not usually point to Launchpad but Salsa // We need to check manually if there is a launchpad repository for the package @@ -350,13 +191,13 @@ async fn get( preferred_vcs = Some(lp_url); } - let base_url = get_base_url(&dist); + let base_url = crate::distro_info::get_base_url(&dist); - let components = get_components(base_url, series, pocket).await?; + let components = crate::distro_info::get_components(&base_url, series, pocket).await?; debug!("Found components: {:?}", components); for component in components { - let url = get_sources_url(base_url, series, pocket, &component); + let url = crate::distro_info::get_sources_url(&base_url, series, pocket, &component); debug!("Fetching sources from: {}", url); @@ -413,7 +254,7 @@ async fn find_package( version: Option<&str>, progress: ProgressCallback<'_>, ) -> Result> { - let series_list = get_ordered_series(dist).await?; + let series_list = crate::distro_info::get_ordered_series(dist).await?; for (i, series) in series_list.iter().enumerate() { if let Some(cb) = progress { @@ -421,13 +262,13 @@ async fn find_package( } let pockets = if pocket.is_empty() { - get_dist_pockets(dist) + crate::distro_info::get_dist_pockets(dist) } else { - vec![pocket] + vec![pocket.to_string()] }; for p in pockets { - match get(package_name, series, p, version).await { + match get(package_name, series, &p, version).await { Ok(info) => { if i > 0 { warn!( @@ -525,26 +366,6 @@ mod tests { assert!(url.is_none()); } - #[tokio::test] - async fn test_get_debian_series() { - let series = get_dist_series("debian").await.unwrap(); - assert!(series.contains(&"sid".to_string())); - assert!(series.contains(&"bookworm".to_string())); - } - - #[tokio::test] - async fn test_get_ubuntu_series() { - let series = get_dist_series("ubuntu").await.unwrap(); - assert!(series.contains(&"noble".to_string())); - assert!(series.contains(&"jammy".to_string())); - } - - #[tokio::test] - async fn test_get_dist_from_series() { - assert_eq!(get_dist_from_series("sid").await.unwrap(), "debian"); - assert_eq!(get_dist_from_series("noble").await.unwrap(), "ubuntu"); - } - #[test] fn test_parse_sources() { use flate2::Compression; diff --git a/src/pull.rs b/src/pull.rs index 17e14ae..12049dd 100644 --- a/src/pull.rs +++ b/src/pull.rs @@ -464,7 +464,7 @@ pub async fn pull( // Depending on target series, we pick target branch; if latest series is specified, // we target the development branch, i.e. the default branch - let branch_name = if crate::package_info::get_ordered_series(package_info.dist.as_str()) + let branch_name = if crate::distro_info::get_ordered_series(package_info.dist.as_str()) .await?[0] != *series {