use crate::veprintln; use anyhow::{Context, Result}; use std::fs::File; use std::io::{BufReader, Read}; use std::path::Path; /// Extract a tarball to the specified directory pub fn extract_tarball(tarball: &Path, dest: &Path) -> Result<()> { let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or(""); // OCI bundle files are always named "oci-…" by generate_cache_filename. // Use the filename prefix as a zero-I/O dispatch signal instead of // scanning the archive for a layers.manifest entry (which required // reading the entire compressed archive twice for large OCI images). if filename.starts_with("oci-") { veprintln!("Detected multi-layer OCI image, extracting layers..."); return extract_multi_layer_oci(tarball, dest); } let file = File::open(tarball) .with_context(|| format!("Failed to open tarball: {}", tarball.display()))?; let reader = BufReader::new(file); // Detect compression format from filename if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") { extract_gz(reader, dest)?; } else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") { extract_xz(reader, dest)?; } else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") { extract_zst(reader, dest)?; } else if filename.ends_with(".tar") { extract_tar(reader, dest)?; } else { // Try to detect from magic bytes let mut magic = [0u8; 6]; let mut peek_reader = BufReader::new(File::open(tarball)?); peek_reader.read_exact(&mut magic)?; match magic { [0x1f, 0x8b, ..] => { // gzip magic drop(peek_reader); let file = File::open(tarball)?; extract_gz(BufReader::new(file), dest)?; } [0xfd, b'7', b'z', b'X', b'Z', 0x00] => { // xz magic drop(peek_reader); let file = File::open(tarball)?; extract_xz(BufReader::new(file), dest)?; } [0x28, 0xb5, 0x2f, 0xfd, ..] => { // zstd magic drop(peek_reader); let file = File::open(tarball)?; extract_zst(BufReader::new(file), dest)?; } _ => { // Assume uncompressed tar drop(peek_reader); let file = File::open(tarball)?; extract_tar(BufReader::new(file), dest)?; } } } Ok(()) } /// Extract a multi-layer OCI image fn extract_multi_layer_oci(tarball: &Path, dest: &Path) -> Result<()> { let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or(""); // Extract the outer OCI bundle (layers.manifest + layer_N.tar.gz files) // into a temp directory. This is our own format, not an OCI layer, so // plain unpack is fine here. let temp_dir = tempfile::tempdir().context("Failed to create temp directory for OCI layers")?; let file = File::open(tarball) .with_context(|| format!("Failed to open OCI tarball: {}", tarball.display()))?; let reader = BufReader::new(file); if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") { tar::Archive::new(flate2::read::GzDecoder::new(reader)) .unpack(temp_dir.path()) .context("Failed to unpack OCI bundle")?; } else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") { tar::Archive::new(xz2::read::XzDecoder::new(reader)) .unpack(temp_dir.path()) .context("Failed to unpack OCI bundle")?; } else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") { tar::Archive::new(zstd::stream::read::Decoder::new(reader)?) .unpack(temp_dir.path()) .context("Failed to unpack OCI bundle")?; } else { tar::Archive::new(reader) .unpack(temp_dir.path()) .context("Failed to unpack OCI bundle")?; } // Read the layers manifest let manifest = std::fs::read_to_string(temp_dir.path().join("layers.manifest")) .context("Failed to read layers.manifest")?; // Apply each layer in order with full whiteout handling. for layer_name in manifest.lines() { let layer_path = temp_dir.path().join(layer_name); if !layer_path.exists() { continue; } veprintln!("Extracting layer: {}", layer_name); extract_oci_layer(&layer_path, dest)?; } Ok(()) } /// Decompress and extract one OCI layer tarball into `dest`, honouring /// whiteout markers. Compression is inferred from the filename then from /// magic bytes. fn extract_oci_layer(layer_path: &Path, dest: &Path) -> Result<()> { use std::io::Read; let layer_name = layer_path .file_name() .and_then(|n| n.to_str()) .unwrap_or(""); let file = File::open(layer_path) .with_context(|| format!("Failed to open layer: {}", layer_path.display()))?; let reader = BufReader::new(file); if layer_name.ends_with(".tar.gz") || layer_name.ends_with(".tgz") { extract_archive_with_whiteouts( tar::Archive::new(flate2::read::GzDecoder::new(reader)), dest, ) } else if layer_name.ends_with(".tar.xz") || layer_name.ends_with(".txz") { extract_archive_with_whiteouts(tar::Archive::new(xz2::read::XzDecoder::new(reader)), dest) } else if layer_name.ends_with(".tar.zst") || layer_name.ends_with(".tar.zstd") { extract_archive_with_whiteouts( tar::Archive::new(zstd::stream::read::Decoder::new(reader)?), dest, ) } else { // Fall back to magic-byte detection let mut magic = [0u8; 6]; let mut peek = BufReader::new(File::open(layer_path)?); let _ = peek.read_exact(&mut magic); // short reads are fine for detection drop(peek); match magic { [0x1f, 0x8b, ..] => extract_archive_with_whiteouts( tar::Archive::new(flate2::read::GzDecoder::new(BufReader::new(File::open( layer_path, )?))), dest, ), [0xfd, b'7', b'z', b'X', b'Z', 0x00] => extract_archive_with_whiteouts( tar::Archive::new(xz2::read::XzDecoder::new(BufReader::new(File::open( layer_path, )?))), dest, ), [0x28, 0xb5, 0x2f, 0xfd, ..] => extract_archive_with_whiteouts( tar::Archive::new(zstd::stream::read::Decoder::new(BufReader::new( File::open(layer_path)?, ))?), dest, ), _ => extract_archive_with_whiteouts( tar::Archive::new(BufReader::new(File::open(layer_path)?)), dest, ), } } } /// Apply one OCI layer archive to `dest`, interpreting Docker whiteout markers: /// /// - `.wh.` — Delete `` from a lower layer that was already /// extracted into `dest`. /// - `.wh..wh..opq` — Opaque whiteout: the directory that contains this entry /// is new in this layer; delete everything already in that /// directory from lower layers before applying new content. /// /// All other entries are extracted normally via `Entry::unpack_in`. fn extract_archive_with_whiteouts( mut archive: tar::Archive, dest: &Path, ) -> Result<()> { archive.set_preserve_permissions(true); archive.set_preserve_ownerships(false); archive.set_unpack_xattrs(false); for entry in archive.entries().context("Failed to iterate tar entries")? { let mut entry = entry.context("Failed to read tar entry")?; // Clone the path before any mutable borrow of entry (needed for unpack_in) let path = entry.path().context("Invalid tar entry path")?.into_owned(); let filename = path .file_name() .map(|n| n.to_string_lossy().into_owned()) .unwrap_or_default(); if filename == ".wh..wh..opq" { // Opaque whiteout: clear all previously-extracted content in the // parent directory so only this layer's content is visible. let parent = path.parent().unwrap_or(Path::new("")); let dest_dir = dest.join(parent); if dest_dir.symlink_metadata().is_ok() { for child in std::fs::read_dir(&dest_dir) .with_context(|| format!("Failed to read {}", dest_dir.display()))? { let child = child?; let child_path = child.path(); remove_path(&child_path).with_context(|| { format!("Opaque whiteout: failed to remove {}", child_path.display()) })?; } } // Do not extract the .wh..wh..opq marker itself. } else if let Some(real_name) = filename.strip_prefix(".wh.") { // Regular whiteout: delete the named path from lower layers. let parent = path.parent().unwrap_or(Path::new("")); let target = dest.join(parent).join(real_name); // symlink_metadata (lstat) does not follow symlinks, so a dangling // symlink is correctly detected and removed rather than silently skipped. if target.symlink_metadata().is_ok() { remove_path(&target) .with_context(|| format!("Whiteout: failed to remove {}", target.display()))?; } // Do not extract the .wh.* marker itself. } else { entry .unpack_in(dest) .with_context(|| format!("Failed to extract {}", path.display()))?; } } Ok(()) } /// Remove a path: uses remove_dir_all for real directories, remove_file for /// everything else (regular files, symlinks — including symlinks-to-dirs). fn remove_path(path: &Path) -> std::io::Result<()> { // symlink_metadata does not follow symlinks, so a symlink-to-dir correctly // reports file_type().is_symlink() rather than is_dir(). let meta = std::fs::symlink_metadata(path)?; if meta.is_dir() { std::fs::remove_dir_all(path) } else { std::fs::remove_file(path) } } fn extract_gz(reader: R, dest: &Path) -> Result<()> { let gz_decoder = flate2::read::GzDecoder::new(reader); let mut archive = tar::Archive::new(gz_decoder); archive.set_preserve_permissions(true); archive.set_preserve_ownerships(false); archive.set_unpack_xattrs(false); archive .unpack(dest) .context("Failed to extract gzip archive")?; Ok(()) } fn extract_xz(reader: R, dest: &Path) -> Result<()> { let xz_decoder = xz2::read::XzDecoder::new(reader); let mut archive = tar::Archive::new(xz_decoder); archive.set_preserve_permissions(true); archive.set_preserve_ownerships(false); archive.set_unpack_xattrs(false); archive .unpack(dest) .context("Failed to extract xz archive")?; Ok(()) } fn extract_zst(reader: R, dest: &Path) -> Result<()> { let zst_decoder = zstd::Decoder::new(reader)?; let mut archive = tar::Archive::new(zst_decoder); archive.set_preserve_permissions(true); archive.set_preserve_ownerships(false); archive.set_unpack_xattrs(false); archive .unpack(dest) .context("Failed to extract zstd archive")?; Ok(()) } fn extract_tar(reader: R, dest: &Path) -> Result<()> { let mut archive = tar::Archive::new(reader); archive.set_preserve_permissions(true); archive.set_preserve_ownerships(false); archive.set_unpack_xattrs(false); archive .unpack(dest) .context("Failed to extract tar archive")?; Ok(()) }