fix: handle hard links in initramfs creation
- Track inodes to avoid duplicating data for hard-linked files - Use fast compression level for faster initramfs creation
This commit is contained in:
+52
-15
@@ -204,8 +204,11 @@ fn create_initramfs(rootfs: &PathBuf) -> Result<PathBuf> {
|
||||
fn create_cpio_archive(rootfs: &Path, pb: &ProgressBar) -> Result<Vec<u8>> {
|
||||
let mut archive = Vec::new();
|
||||
|
||||
// Track seen inodes to handle hard links properly
|
||||
let mut seen_inodes = std::collections::HashMap::new();
|
||||
|
||||
// Collect all entries with their data
|
||||
let entries = collect_entries(rootfs, rootfs, pb)?;
|
||||
let entries = collect_entries(rootfs, rootfs, pb, &mut seen_inodes)?;
|
||||
|
||||
// Collect entry names for checking existence later
|
||||
let entry_names: Vec<&str> = entries.iter().map(|(n, _, _, _, _)| n.as_str()).collect();
|
||||
@@ -275,8 +278,15 @@ fn create_cpio_archive(rootfs: &Path, pb: &ProgressBar) -> Result<Vec<u8>> {
|
||||
}
|
||||
|
||||
/// Collect all filesystem entries recursively
|
||||
fn collect_entries(base: &Path, current: &Path, pb: &ProgressBar) -> Result<Vec<(String, u32, u32, u32, Vec<u8>)>> {
|
||||
/// Uses a HashMap to track hard links by (device, inode) - only stores data for first occurrence
|
||||
fn collect_entries(
|
||||
base: &Path,
|
||||
current: &Path,
|
||||
pb: &ProgressBar,
|
||||
seen_inodes: &mut std::collections::HashMap<(u64, u64), String>,
|
||||
) -> Result<Vec<(String, u32, u32, u32, Vec<u8>)>> {
|
||||
let mut entries = Vec::new();
|
||||
let mut total_data: u64 = 0;
|
||||
|
||||
// Read directory entries
|
||||
let dir_entries: Vec<_> = match std::fs::read_dir(current) {
|
||||
@@ -317,42 +327,69 @@ fn collect_entries(base: &Path, current: &Path, pb: &ProgressBar) -> Result<Vec<
|
||||
continue; // Skip other types (sockets, fifos, etc.)
|
||||
};
|
||||
|
||||
// Get file content or symlink target
|
||||
let data: Vec<u8> = if file_type.is_file() {
|
||||
match std::fs::read(&path) {
|
||||
// Build the entry name (relative path from base)
|
||||
let relative = path.strip_prefix(base).unwrap();
|
||||
let entry_name = relative.to_string_lossy().into_owned();
|
||||
|
||||
// Handle hard links: only store data for first occurrence
|
||||
let (data, nlink) = if file_type.is_file() && metadata.nlink() > 1 {
|
||||
// This file has multiple hard links - check if we've seen it before
|
||||
let inode_key = (metadata.dev(), metadata.ino());
|
||||
|
||||
if let Some(_first_path) = seen_inodes.get(&inode_key) {
|
||||
// We've seen this inode before - create a hard link entry with no data
|
||||
(Vec::new(), metadata.nlink() as u32)
|
||||
} else {
|
||||
// First occurrence - read the data and record this inode
|
||||
seen_inodes.insert(inode_key, entry_name.clone());
|
||||
let data = match std::fs::read(&path) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
veprintln!("Warning: cannot read file {}: {}", path.display(), e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
(data, metadata.nlink() as u32)
|
||||
}
|
||||
} else if file_type.is_file() {
|
||||
// Regular file with nlink=1
|
||||
let data = match std::fs::read(&path) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
veprintln!("Warning: cannot read file {}: {}", path.display(), e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
(data, 1)
|
||||
} else if file_type.is_symlink() {
|
||||
match std::fs::read_link(&path) {
|
||||
Ok(target) => target.to_string_lossy().into_owned().into_bytes(),
|
||||
Ok(target) => (target.to_string_lossy().into_owned().into_bytes(), 1),
|
||||
Err(e) => {
|
||||
veprintln!("Warning: cannot read symlink {}: {}", path.display(), e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Vec::new()
|
||||
// Directory
|
||||
(Vec::new(), 2)
|
||||
};
|
||||
|
||||
// Build the entry name (relative path from base)
|
||||
let relative = path.strip_prefix(base).unwrap();
|
||||
let entry_name = relative.to_string_lossy().into_owned();
|
||||
|
||||
// nlink: directories have 2 (. and ..), files/symlinks have 1
|
||||
let nlink = if file_type.is_dir() { 2 } else { 1 };
|
||||
|
||||
total_data += data.len() as u64;
|
||||
entries.push((entry_name, mode, metadata.mtime() as u32, nlink, data));
|
||||
|
||||
// Recurse into directories
|
||||
if file_type.is_dir() {
|
||||
let mut sub_entries = collect_entries(base, &path, pb)?;
|
||||
let mut sub_entries = collect_entries(base, &path, pb, seen_inodes)?;
|
||||
for (_, _, _, _, d) in &sub_entries {
|
||||
total_data += d.len() as u64;
|
||||
}
|
||||
entries.append(&mut sub_entries);
|
||||
}
|
||||
}
|
||||
|
||||
// Only print summary for the root directory
|
||||
if current == base {
|
||||
veprintln!("Collected {} entries, {} bytes total data", entries.len(), total_data);
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user