313 lines
12 KiB
Rust
313 lines
12 KiB
Rust
use crate::veprintln;
|
|
use anyhow::{Context, Result};
|
|
use std::fs::File;
|
|
use std::io::{BufReader, Read};
|
|
use std::path::Path;
|
|
|
|
/// Extract a tarball to the specified directory
|
|
pub fn extract_tarball(tarball: &Path, dest: &Path) -> Result<()> {
|
|
let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
|
|
|
// OCI bundle files are always named "oci-…" by generate_cache_filename.
|
|
// Use the filename prefix as a zero-I/O dispatch signal instead of
|
|
// scanning the archive for a layers.manifest entry (which required
|
|
// reading the entire compressed archive twice for large OCI images).
|
|
if filename.starts_with("oci-") {
|
|
veprintln!("Detected multi-layer OCI image, extracting layers...");
|
|
return extract_multi_layer_oci(tarball, dest);
|
|
}
|
|
|
|
let file = File::open(tarball)
|
|
.with_context(|| format!("Failed to open tarball: {}", tarball.display()))?;
|
|
|
|
let reader = BufReader::new(file);
|
|
|
|
// Detect compression format from filename
|
|
if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") {
|
|
extract_gz(reader, dest)?;
|
|
} else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") {
|
|
extract_xz(reader, dest)?;
|
|
} else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") {
|
|
extract_zst(reader, dest)?;
|
|
} else if filename.ends_with(".tar") {
|
|
extract_tar(reader, dest)?;
|
|
} else {
|
|
// Try to detect from magic bytes
|
|
let mut magic = [0u8; 6];
|
|
let mut peek_reader = BufReader::new(File::open(tarball)?);
|
|
peek_reader.read_exact(&mut magic)?;
|
|
|
|
match magic {
|
|
[0x1f, 0x8b, ..] => {
|
|
// gzip magic
|
|
drop(peek_reader);
|
|
let file = File::open(tarball)?;
|
|
extract_gz(BufReader::new(file), dest)?;
|
|
}
|
|
[0xfd, b'7', b'z', b'X', b'Z', 0x00] => {
|
|
// xz magic
|
|
drop(peek_reader);
|
|
let file = File::open(tarball)?;
|
|
extract_xz(BufReader::new(file), dest)?;
|
|
}
|
|
[0x28, 0xb5, 0x2f, 0xfd, ..] => {
|
|
// zstd magic
|
|
drop(peek_reader);
|
|
let file = File::open(tarball)?;
|
|
extract_zst(BufReader::new(file), dest)?;
|
|
}
|
|
_ => {
|
|
// Assume uncompressed tar
|
|
drop(peek_reader);
|
|
let file = File::open(tarball)?;
|
|
extract_tar(BufReader::new(file), dest)?;
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Extract a multi-layer OCI image
|
|
fn extract_multi_layer_oci(tarball: &Path, dest: &Path) -> Result<()> {
|
|
let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
|
|
|
// Extract the outer OCI bundle (layers.manifest + layer_N.tar.gz files)
|
|
// into a temp directory. This is our own format, not an OCI layer, so
|
|
// plain unpack is fine here.
|
|
let temp_dir = tempfile::tempdir().context("Failed to create temp directory for OCI layers")?;
|
|
|
|
let file = File::open(tarball)
|
|
.with_context(|| format!("Failed to open OCI tarball: {}", tarball.display()))?;
|
|
let reader = BufReader::new(file);
|
|
|
|
if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") {
|
|
tar::Archive::new(flate2::read::GzDecoder::new(reader))
|
|
.unpack(temp_dir.path())
|
|
.context("Failed to unpack OCI bundle")?;
|
|
} else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") {
|
|
tar::Archive::new(xz2::read::XzDecoder::new(reader))
|
|
.unpack(temp_dir.path())
|
|
.context("Failed to unpack OCI bundle")?;
|
|
} else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") {
|
|
tar::Archive::new(zstd::stream::read::Decoder::new(reader)?)
|
|
.unpack(temp_dir.path())
|
|
.context("Failed to unpack OCI bundle")?;
|
|
} else {
|
|
tar::Archive::new(reader)
|
|
.unpack(temp_dir.path())
|
|
.context("Failed to unpack OCI bundle")?;
|
|
}
|
|
|
|
// Read the layers manifest
|
|
let manifest = std::fs::read_to_string(temp_dir.path().join("layers.manifest"))
|
|
.context("Failed to read layers.manifest")?;
|
|
|
|
// Apply each layer in order with full whiteout handling.
|
|
for layer_name in manifest.lines() {
|
|
let layer_path = temp_dir.path().join(layer_name);
|
|
if !layer_path.exists() {
|
|
continue;
|
|
}
|
|
veprintln!("Extracting layer: {}", layer_name);
|
|
extract_oci_layer(&layer_path, dest)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Decompress and extract one OCI layer tarball into `dest`, honouring
|
|
/// whiteout markers. Compression is inferred from the filename then from
|
|
/// magic bytes.
|
|
fn extract_oci_layer(layer_path: &Path, dest: &Path) -> Result<()> {
|
|
use std::io::Read;
|
|
let layer_name = layer_path
|
|
.file_name()
|
|
.and_then(|n| n.to_str())
|
|
.unwrap_or("");
|
|
let file = File::open(layer_path)
|
|
.with_context(|| format!("Failed to open layer: {}", layer_path.display()))?;
|
|
let reader = BufReader::new(file);
|
|
|
|
if layer_name.ends_with(".tar.gz") || layer_name.ends_with(".tgz") {
|
|
extract_archive_with_whiteouts(
|
|
tar::Archive::new(flate2::read::GzDecoder::new(reader)),
|
|
dest,
|
|
)
|
|
} else if layer_name.ends_with(".tar.xz") || layer_name.ends_with(".txz") {
|
|
extract_archive_with_whiteouts(tar::Archive::new(xz2::read::XzDecoder::new(reader)), dest)
|
|
} else if layer_name.ends_with(".tar.zst") || layer_name.ends_with(".tar.zstd") {
|
|
extract_archive_with_whiteouts(
|
|
tar::Archive::new(zstd::stream::read::Decoder::new(reader)?),
|
|
dest,
|
|
)
|
|
} else {
|
|
// Fall back to magic-byte detection
|
|
let mut magic = [0u8; 6];
|
|
let mut peek = BufReader::new(File::open(layer_path)?);
|
|
let _ = peek.read_exact(&mut magic); // short reads are fine for detection
|
|
drop(peek);
|
|
match magic {
|
|
[0x1f, 0x8b, ..] => extract_archive_with_whiteouts(
|
|
tar::Archive::new(flate2::read::GzDecoder::new(BufReader::new(File::open(
|
|
layer_path,
|
|
)?))),
|
|
dest,
|
|
),
|
|
[0xfd, b'7', b'z', b'X', b'Z', 0x00] => extract_archive_with_whiteouts(
|
|
tar::Archive::new(xz2::read::XzDecoder::new(BufReader::new(File::open(
|
|
layer_path,
|
|
)?))),
|
|
dest,
|
|
),
|
|
[0x28, 0xb5, 0x2f, 0xfd, ..] => extract_archive_with_whiteouts(
|
|
tar::Archive::new(zstd::stream::read::Decoder::new(BufReader::new(
|
|
File::open(layer_path)?,
|
|
))?),
|
|
dest,
|
|
),
|
|
_ => extract_archive_with_whiteouts(
|
|
tar::Archive::new(BufReader::new(File::open(layer_path)?)),
|
|
dest,
|
|
),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Apply one OCI layer archive to `dest`, interpreting Docker whiteout markers:
|
|
///
|
|
/// - `.wh.<name>` — Delete `<name>` from a lower layer that was already
|
|
/// extracted into `dest`.
|
|
/// - `.wh..wh..opq` — Opaque whiteout: the directory that contains this entry
|
|
/// is new in this layer; delete everything already in that
|
|
/// directory from lower layers before applying new content.
|
|
///
|
|
/// All other entries are extracted normally via `Entry::unpack_in`.
|
|
fn extract_archive_with_whiteouts<R: std::io::Read>(
|
|
mut archive: tar::Archive<R>,
|
|
dest: &Path,
|
|
) -> Result<()> {
|
|
archive.set_preserve_permissions(true);
|
|
archive.set_preserve_ownerships(false);
|
|
archive.set_unpack_xattrs(false);
|
|
|
|
for entry in archive.entries().context("Failed to iterate tar entries")? {
|
|
let mut entry = entry.context("Failed to read tar entry")?;
|
|
|
|
// Clone the path before any mutable borrow of entry (needed for unpack_in)
|
|
let path = entry.path().context("Invalid tar entry path")?.into_owned();
|
|
|
|
let filename = path
|
|
.file_name()
|
|
.map(|n| n.to_string_lossy().into_owned())
|
|
.unwrap_or_default();
|
|
|
|
if filename == ".wh..wh..opq" {
|
|
// Opaque whiteout: clear all previously-extracted content in the
|
|
// parent directory so only this layer's content is visible.
|
|
let parent = path.parent().unwrap_or(Path::new(""));
|
|
let dest_dir = dest.join(parent);
|
|
if dest_dir.symlink_metadata().is_ok() {
|
|
for child in std::fs::read_dir(&dest_dir)
|
|
.with_context(|| format!("Failed to read {}", dest_dir.display()))?
|
|
{
|
|
let child = child?;
|
|
let child_path = child.path();
|
|
remove_path(&child_path).with_context(|| {
|
|
format!("Opaque whiteout: failed to remove {}", child_path.display())
|
|
})?;
|
|
}
|
|
}
|
|
// Do not extract the .wh..wh..opq marker itself.
|
|
} else if let Some(real_name) = filename.strip_prefix(".wh.") {
|
|
// Regular whiteout: delete the named path from lower layers.
|
|
let parent = path.parent().unwrap_or(Path::new(""));
|
|
let target = dest.join(parent).join(real_name);
|
|
// symlink_metadata (lstat) does not follow symlinks, so a dangling
|
|
// symlink is correctly detected and removed rather than silently skipped.
|
|
if target.symlink_metadata().is_ok() {
|
|
remove_path(&target)
|
|
.with_context(|| format!("Whiteout: failed to remove {}", target.display()))?;
|
|
}
|
|
// Do not extract the .wh.* marker itself.
|
|
} else {
|
|
entry
|
|
.unpack_in(dest)
|
|
.with_context(|| format!("Failed to extract {}", path.display()))?;
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Remove a path: uses remove_dir_all for real directories, remove_file for
|
|
/// everything else (regular files, symlinks — including symlinks-to-dirs).
|
|
fn remove_path(path: &Path) -> std::io::Result<()> {
|
|
// symlink_metadata does not follow symlinks, so a symlink-to-dir correctly
|
|
// reports file_type().is_symlink() rather than is_dir().
|
|
let meta = std::fs::symlink_metadata(path)?;
|
|
if meta.is_dir() {
|
|
std::fs::remove_dir_all(path)
|
|
} else {
|
|
std::fs::remove_file(path)
|
|
}
|
|
}
|
|
|
|
fn extract_gz<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
|
|
let gz_decoder = flate2::read::GzDecoder::new(reader);
|
|
let mut archive = tar::Archive::new(gz_decoder);
|
|
|
|
archive.set_preserve_permissions(true);
|
|
archive.set_preserve_ownerships(false);
|
|
archive.set_unpack_xattrs(false);
|
|
|
|
archive
|
|
.unpack(dest)
|
|
.context("Failed to extract gzip archive")?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn extract_xz<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
|
|
let xz_decoder = xz2::read::XzDecoder::new(reader);
|
|
let mut archive = tar::Archive::new(xz_decoder);
|
|
|
|
archive.set_preserve_permissions(true);
|
|
archive.set_preserve_ownerships(false);
|
|
archive.set_unpack_xattrs(false);
|
|
|
|
archive
|
|
.unpack(dest)
|
|
.context("Failed to extract xz archive")?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn extract_zst<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
|
|
let zst_decoder = zstd::Decoder::new(reader)?;
|
|
let mut archive = tar::Archive::new(zst_decoder);
|
|
|
|
archive.set_preserve_permissions(true);
|
|
archive.set_preserve_ownerships(false);
|
|
archive.set_unpack_xattrs(false);
|
|
|
|
archive
|
|
.unpack(dest)
|
|
.context("Failed to extract zstd archive")?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn extract_tar<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
|
|
let mut archive = tar::Archive::new(reader);
|
|
|
|
archive.set_preserve_permissions(true);
|
|
archive.set_preserve_ownerships(false);
|
|
archive.set_unpack_xattrs(false);
|
|
|
|
archive
|
|
.unpack(dest)
|
|
.context("Failed to extract tar archive")?;
|
|
|
|
Ok(())
|
|
}
|