diff --git a/src/qemu_vm.rs b/src/qemu_vm.rs index dd734d3..ec4d1de 100644 --- a/src/qemu_vm.rs +++ b/src/qemu_vm.rs @@ -204,8 +204,11 @@ fn create_initramfs(rootfs: &PathBuf) -> Result { fn create_cpio_archive(rootfs: &Path, pb: &ProgressBar) -> Result> { let mut archive = Vec::new(); + // Track seen inodes to handle hard links properly + let mut seen_inodes = std::collections::HashMap::new(); + // Collect all entries with their data - let entries = collect_entries(rootfs, rootfs, pb)?; + let entries = collect_entries(rootfs, rootfs, pb, &mut seen_inodes)?; // Collect entry names for checking existence later let entry_names: Vec<&str> = entries.iter().map(|(n, _, _, _, _)| n.as_str()).collect(); @@ -275,8 +278,15 @@ fn create_cpio_archive(rootfs: &Path, pb: &ProgressBar) -> Result> { } /// Collect all filesystem entries recursively -fn collect_entries(base: &Path, current: &Path, pb: &ProgressBar) -> Result)>> { +/// Uses a HashMap to track hard links by (device, inode) - only stores data for first occurrence +fn collect_entries( + base: &Path, + current: &Path, + pb: &ProgressBar, + seen_inodes: &mut std::collections::HashMap<(u64, u64), String>, +) -> Result)>> { let mut entries = Vec::new(); + let mut total_data: u64 = 0; // Read directory entries let dir_entries: Vec<_> = match std::fs::read_dir(current) { @@ -317,42 +327,69 @@ fn collect_entries(base: &Path, current: &Path, pb: &ProgressBar) -> Result = if file_type.is_file() { - match std::fs::read(&path) { + // Build the entry name (relative path from base) + let relative = path.strip_prefix(base).unwrap(); + let entry_name = relative.to_string_lossy().into_owned(); + + // Handle hard links: only store data for first occurrence + let (data, nlink) = if file_type.is_file() && metadata.nlink() > 1 { + // This file has multiple hard links - check if we've seen it before + let inode_key = (metadata.dev(), metadata.ino()); + + if let Some(_first_path) = seen_inodes.get(&inode_key) { + // We've seen this inode before - create a hard link entry with no data + (Vec::new(), metadata.nlink() as u32) + } else { + // First occurrence - read the data and record this inode + seen_inodes.insert(inode_key, entry_name.clone()); + let data = match std::fs::read(&path) { + Ok(data) => data, + Err(e) => { + veprintln!("Warning: cannot read file {}: {}", path.display(), e); + continue; + } + }; + (data, metadata.nlink() as u32) + } + } else if file_type.is_file() { + // Regular file with nlink=1 + let data = match std::fs::read(&path) { Ok(data) => data, Err(e) => { veprintln!("Warning: cannot read file {}: {}", path.display(), e); continue; } - } + }; + (data, 1) } else if file_type.is_symlink() { match std::fs::read_link(&path) { - Ok(target) => target.to_string_lossy().into_owned().into_bytes(), + Ok(target) => (target.to_string_lossy().into_owned().into_bytes(), 1), Err(e) => { veprintln!("Warning: cannot read symlink {}: {}", path.display(), e); continue; } } } else { - Vec::new() + // Directory + (Vec::new(), 2) }; - // Build the entry name (relative path from base) - let relative = path.strip_prefix(base).unwrap(); - let entry_name = relative.to_string_lossy().into_owned(); - - // nlink: directories have 2 (. and ..), files/symlinks have 1 - let nlink = if file_type.is_dir() { 2 } else { 1 }; - + total_data += data.len() as u64; entries.push((entry_name, mode, metadata.mtime() as u32, nlink, data)); // Recurse into directories if file_type.is_dir() { - let mut sub_entries = collect_entries(base, &path, pb)?; + let mut sub_entries = collect_entries(base, &path, pb, seen_inodes)?; + for (_, _, _, _, d) in &sub_entries { + total_data += d.len() as u64; + } entries.append(&mut sub_entries); } } + // Only print summary for the root directory + if current == base { + veprintln!("Collected {} entries, {} bytes total data", entries.len(), total_data); + } Ok(entries) }