Skip to content

Commit

Permalink
improve guesses on decompressed filesizes
Browse files Browse the repository at this point in the history
  • Loading branch information
djugei committed Feb 11, 2025
1 parent 34435f3 commit b29fc68
Showing 1 changed file with 54 additions and 10 deletions.
64 changes: 54 additions & 10 deletions client/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,19 +176,21 @@ pub(crate) fn find_deltaupgrade_candidates(
None
}
}) {
// try to find the decompressed size for better progress monitoring
// Try to find the decompressed size for better progress monitoring
let oldfile = std::fs::File::open(oldpath).expect("io error on local disk");
// safety: i promise to not open the same file as writable at the same time
// Safety: I promise to not open the same file as writable at the same time
let oldfile = unsafe { Mmap::map(&oldfile).expect("mmap failed") };
// 16 megabytes seems like an ok average size
// todo: find the actual average size of a decompressed package
let default_size = 16 * 1024 * 1024;
// due to pacman packages being compressed in streaming mode
// Testing reveals the average size to be 17.2 MB
let default_size = 17 * 1024 * 1024;
// Due to pacman packages being compressed in streaming mode
// zstd does not have an exact decompressed size and heavily overestimates
let dec_size = zstd::bulk::Decompressor::upper_bound(&oldfile).unwrap_or_else(|| {
debug!("using default size for {name}");
default_size
});
let dec_size = zstd::bulk::Decompressor::upper_bound(&oldfile)
// The real ratio is 18/47.4, 4/11 is close enough
.map(|s| (s as u64) * 4 / 11)
.unwrap_or_else(|| {
debug!("using default size for {name}");
default_size
});
Ok((line, pkg, oldpkg, oldfile, (dec_size as u64)))
} else {
info!("no cached package found, leaving {} for pacman", filename);
Expand Down Expand Up @@ -487,3 +489,45 @@ pub(crate) fn calc_stats(count: usize) -> std::io::Result<()> {

Ok(())
}

#[test]
fn test_dec_sizes() {
use std::io::Seek;
use std::os::unix::ffi::OsStrExt;
use std::os::unix::fs::MetadataExt;
let mut buf = vec![0; 2048];
let mut actual_size = 0u64;
let mut guessed_size = 0u64;
let mut count = 0u64;
let mut compressed = 0u64;
for line in std::fs::read_dir(PACMAN_CACHE).unwrap() {
let line = line.unwrap();
if !line.file_type().unwrap().is_file() {
continue;
}
if !line.file_name().as_bytes().ends_with(b".tar.zst") {
continue;
}

compressed += line.metadata().unwrap().size();
let mut file = std::fs::File::open(line.path()).expect("io error on local disk");
let mmapfile = unsafe { Mmap::map(&file).expect("mmap failed") };
guessed_size += zstd::bulk::Decompressor::upper_bound(&mmapfile).unwrap() as u64;
file.rewind().unwrap();
let mut dec = zstd::Decoder::new(file).unwrap();
while let Ok(read) = dec.read(&mut buf) {
if read == 0 {
break;
}
actual_size += read as u64;
}
count += 1;
}
println!("real size is {}", ByteSize::b(actual_size));
println!("guess is {}", ByteSize::b(guessed_size));
println!("average package size is {}", ByteSize::b(actual_size / count));
println!(
"average compression ratio: {}",
(compressed as f64) / (actual_size as f64)
);
}

0 comments on commit b29fc68

Please sign in to comment.