feat: basic functionality

This commit is contained in:
2026-06-09 19:09:57 +02:00
parent a3339f9d34
commit 04ed1f25ae
15 changed files with 5374 additions and 63 deletions
+312
View File
@@ -0,0 +1,312 @@
use crate::veprintln;
use anyhow::{Context, Result};
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::Path;
/// Extract a tarball to the specified directory
pub fn extract_tarball(tarball: &Path, dest: &Path) -> Result<()> {
let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or("");
// OCI bundle files are always named "oci-…" by generate_cache_filename.
// Use the filename prefix as a zero-I/O dispatch signal instead of
// scanning the archive for a layers.manifest entry (which required
// reading the entire compressed archive twice for large OCI images).
if filename.starts_with("oci-") {
veprintln!("Detected multi-layer OCI image, extracting layers...");
return extract_multi_layer_oci(tarball, dest);
}
let file = File::open(tarball)
.with_context(|| format!("Failed to open tarball: {}", tarball.display()))?;
let reader = BufReader::new(file);
// Detect compression format from filename
if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") {
extract_gz(reader, dest)?;
} else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") {
extract_xz(reader, dest)?;
} else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") {
extract_zst(reader, dest)?;
} else if filename.ends_with(".tar") {
extract_tar(reader, dest)?;
} else {
// Try to detect from magic bytes
let mut magic = [0u8; 6];
let mut peek_reader = BufReader::new(File::open(tarball)?);
peek_reader.read_exact(&mut magic)?;
match magic {
[0x1f, 0x8b, ..] => {
// gzip magic
drop(peek_reader);
let file = File::open(tarball)?;
extract_gz(BufReader::new(file), dest)?;
}
[0xfd, b'7', b'z', b'X', b'Z', 0x00] => {
// xz magic
drop(peek_reader);
let file = File::open(tarball)?;
extract_xz(BufReader::new(file), dest)?;
}
[0x28, 0xb5, 0x2f, 0xfd, ..] => {
// zstd magic
drop(peek_reader);
let file = File::open(tarball)?;
extract_zst(BufReader::new(file), dest)?;
}
_ => {
// Assume uncompressed tar
drop(peek_reader);
let file = File::open(tarball)?;
extract_tar(BufReader::new(file), dest)?;
}
}
}
Ok(())
}
/// Extract a multi-layer OCI image
fn extract_multi_layer_oci(tarball: &Path, dest: &Path) -> Result<()> {
let filename = tarball.file_name().and_then(|n| n.to_str()).unwrap_or("");
// Extract the outer OCI bundle (layers.manifest + layer_N.tar.gz files)
// into a temp directory. This is our own format, not an OCI layer, so
// plain unpack is fine here.
let temp_dir = tempfile::tempdir().context("Failed to create temp directory for OCI layers")?;
let file = File::open(tarball)
.with_context(|| format!("Failed to open OCI tarball: {}", tarball.display()))?;
let reader = BufReader::new(file);
if filename.ends_with(".tar.gz") || filename.ends_with(".tgz") {
tar::Archive::new(flate2::read::GzDecoder::new(reader))
.unpack(temp_dir.path())
.context("Failed to unpack OCI bundle")?;
} else if filename.ends_with(".tar.xz") || filename.ends_with(".txz") {
tar::Archive::new(xz2::read::XzDecoder::new(reader))
.unpack(temp_dir.path())
.context("Failed to unpack OCI bundle")?;
} else if filename.ends_with(".tar.zst") || filename.ends_with(".tar.zstd") {
tar::Archive::new(zstd::stream::read::Decoder::new(reader)?)
.unpack(temp_dir.path())
.context("Failed to unpack OCI bundle")?;
} else {
tar::Archive::new(reader)
.unpack(temp_dir.path())
.context("Failed to unpack OCI bundle")?;
}
// Read the layers manifest
let manifest = std::fs::read_to_string(temp_dir.path().join("layers.manifest"))
.context("Failed to read layers.manifest")?;
// Apply each layer in order with full whiteout handling.
for layer_name in manifest.lines() {
let layer_path = temp_dir.path().join(layer_name);
if !layer_path.exists() {
continue;
}
veprintln!("Extracting layer: {}", layer_name);
extract_oci_layer(&layer_path, dest)?;
}
Ok(())
}
/// Decompress and extract one OCI layer tarball into `dest`, honouring
/// whiteout markers. Compression is inferred from the filename then from
/// magic bytes.
fn extract_oci_layer(layer_path: &Path, dest: &Path) -> Result<()> {
use std::io::Read;
let layer_name = layer_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
let file = File::open(layer_path)
.with_context(|| format!("Failed to open layer: {}", layer_path.display()))?;
let reader = BufReader::new(file);
if layer_name.ends_with(".tar.gz") || layer_name.ends_with(".tgz") {
extract_archive_with_whiteouts(
tar::Archive::new(flate2::read::GzDecoder::new(reader)),
dest,
)
} else if layer_name.ends_with(".tar.xz") || layer_name.ends_with(".txz") {
extract_archive_with_whiteouts(tar::Archive::new(xz2::read::XzDecoder::new(reader)), dest)
} else if layer_name.ends_with(".tar.zst") || layer_name.ends_with(".tar.zstd") {
extract_archive_with_whiteouts(
tar::Archive::new(zstd::stream::read::Decoder::new(reader)?),
dest,
)
} else {
// Fall back to magic-byte detection
let mut magic = [0u8; 6];
let mut peek = BufReader::new(File::open(layer_path)?);
let _ = peek.read_exact(&mut magic); // short reads are fine for detection
drop(peek);
match magic {
[0x1f, 0x8b, ..] => extract_archive_with_whiteouts(
tar::Archive::new(flate2::read::GzDecoder::new(BufReader::new(File::open(
layer_path,
)?))),
dest,
),
[0xfd, b'7', b'z', b'X', b'Z', 0x00] => extract_archive_with_whiteouts(
tar::Archive::new(xz2::read::XzDecoder::new(BufReader::new(File::open(
layer_path,
)?))),
dest,
),
[0x28, 0xb5, 0x2f, 0xfd, ..] => extract_archive_with_whiteouts(
tar::Archive::new(zstd::stream::read::Decoder::new(BufReader::new(
File::open(layer_path)?,
))?),
dest,
),
_ => extract_archive_with_whiteouts(
tar::Archive::new(BufReader::new(File::open(layer_path)?)),
dest,
),
}
}
}
/// Apply one OCI layer archive to `dest`, interpreting Docker whiteout markers:
///
/// - `.wh.<name>` — Delete `<name>` from a lower layer that was already
/// extracted into `dest`.
/// - `.wh..wh..opq` — Opaque whiteout: the directory that contains this entry
/// is new in this layer; delete everything already in that
/// directory from lower layers before applying new content.
///
/// All other entries are extracted normally via `Entry::unpack_in`.
fn extract_archive_with_whiteouts<R: std::io::Read>(
mut archive: tar::Archive<R>,
dest: &Path,
) -> Result<()> {
archive.set_preserve_permissions(true);
archive.set_preserve_ownerships(false);
archive.set_unpack_xattrs(false);
for entry in archive.entries().context("Failed to iterate tar entries")? {
let mut entry = entry.context("Failed to read tar entry")?;
// Clone the path before any mutable borrow of entry (needed for unpack_in)
let path = entry.path().context("Invalid tar entry path")?.into_owned();
let filename = path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_default();
if filename == ".wh..wh..opq" {
// Opaque whiteout: clear all previously-extracted content in the
// parent directory so only this layer's content is visible.
let parent = path.parent().unwrap_or(Path::new(""));
let dest_dir = dest.join(parent);
if dest_dir.symlink_metadata().is_ok() {
for child in std::fs::read_dir(&dest_dir)
.with_context(|| format!("Failed to read {}", dest_dir.display()))?
{
let child = child?;
let child_path = child.path();
remove_path(&child_path).with_context(|| {
format!("Opaque whiteout: failed to remove {}", child_path.display())
})?;
}
}
// Do not extract the .wh..wh..opq marker itself.
} else if let Some(real_name) = filename.strip_prefix(".wh.") {
// Regular whiteout: delete the named path from lower layers.
let parent = path.parent().unwrap_or(Path::new(""));
let target = dest.join(parent).join(real_name);
// symlink_metadata (lstat) does not follow symlinks, so a dangling
// symlink is correctly detected and removed rather than silently skipped.
if target.symlink_metadata().is_ok() {
remove_path(&target)
.with_context(|| format!("Whiteout: failed to remove {}", target.display()))?;
}
// Do not extract the .wh.* marker itself.
} else {
entry
.unpack_in(dest)
.with_context(|| format!("Failed to extract {}", path.display()))?;
}
}
Ok(())
}
/// Remove a path: uses remove_dir_all for real directories, remove_file for
/// everything else (regular files, symlinks — including symlinks-to-dirs).
fn remove_path(path: &Path) -> std::io::Result<()> {
// symlink_metadata does not follow symlinks, so a symlink-to-dir correctly
// reports file_type().is_symlink() rather than is_dir().
let meta = std::fs::symlink_metadata(path)?;
if meta.is_dir() {
std::fs::remove_dir_all(path)
} else {
std::fs::remove_file(path)
}
}
fn extract_gz<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
let gz_decoder = flate2::read::GzDecoder::new(reader);
let mut archive = tar::Archive::new(gz_decoder);
archive.set_preserve_permissions(true);
archive.set_preserve_ownerships(false);
archive.set_unpack_xattrs(false);
archive
.unpack(dest)
.context("Failed to extract gzip archive")?;
Ok(())
}
fn extract_xz<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
let xz_decoder = xz2::read::XzDecoder::new(reader);
let mut archive = tar::Archive::new(xz_decoder);
archive.set_preserve_permissions(true);
archive.set_preserve_ownerships(false);
archive.set_unpack_xattrs(false);
archive
.unpack(dest)
.context("Failed to extract xz archive")?;
Ok(())
}
fn extract_zst<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
let zst_decoder = zstd::Decoder::new(reader)?;
let mut archive = tar::Archive::new(zst_decoder);
archive.set_preserve_permissions(true);
archive.set_preserve_ownerships(false);
archive.set_unpack_xattrs(false);
archive
.unpack(dest)
.context("Failed to extract zstd archive")?;
Ok(())
}
fn extract_tar<R: std::io::Read>(reader: R, dest: &Path) -> Result<()> {
let mut archive = tar::Archive::new(reader);
archive.set_preserve_permissions(true);
archive.set_preserve_ownerships(false);
archive.set_unpack_xattrs(false);
archive
.unpack(dest)
.context("Failed to extract tar archive")?;
Ok(())
}