diff --git a/CHANGELOG.md b/CHANGELOG.md index 697e71b..6c30095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ Released YYYY-MM-DD. ### Added -* TODO (or remove section if none) +* Bindings to `LLVMFuzzerCustomCrossOver` through the `fuzz_crossover` macro. +* `example_crossover` using both `fuzz_mutator` and `fuzz_crossover` (adapted from @rigtorp) ### Changed diff --git a/Cargo.toml b/Cargo.toml index 9aaf813..bdcd0d9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ arbitrary-derive = ["arbitrary/derive"] members = [ "./example/fuzz", "./example_arbitrary/fuzz", + "./example_crossover/fuzz", "./example_mutator/fuzz", ] diff --git a/ci/script.sh b/ci/script.sh index 59462d4..3eb2d17 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -30,4 +30,10 @@ cargo fuzz build --dev (! cargo fuzz run boom -- -runs=10000000) popd +pushd ./example_crossover +cargo fuzz build +cargo fuzz build --dev +(! cargo fuzz run --release boom -- -runs=10000000) +popd + echo "All good!" diff --git a/example_crossover/Cargo.toml b/example_crossover/Cargo.toml new file mode 100644 index 0000000..441b5e3 --- /dev/null +++ b/example_crossover/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "example_crossover" +version = "0.1.0" +authors = ["R. Elliott Childre"] +edition = "2021" + +[target.'cfg(fuzzing)'.dependencies] +rand = "0.8" diff --git a/example_crossover/README.md b/example_crossover/README.md new file mode 100644 index 0000000..cd7caae --- /dev/null +++ b/example_crossover/README.md @@ -0,0 +1,40 @@ +# A Custom Crossover Example + +## Overview + +This example is a reimplementation of [Erik Rigtorp's floating point summation fuzzing example][1] +in the Rust bindings for LibFuzzer, provided by this crate. In this particular example, Erik uses +both a custom mutator, and a custom crossover function, which provides a well-documented, complex +code example. + +## Implementation + +This is mostly a one-to-one rewrite of the C++ code in the blog post, with the big difference +being the method of converting the raw bytes that is exposed to the custom functions, into the +decoded double-precision floating-point values. Where in C++ we can simply do: + +```c++ +uint8_t *Data = ...; +size_t Size = ...; +double *begin = (double *)Data; +double *end = (double *)Data + Size / sizeof(double); +``` + +In Rust, however, the task seems a bit more complex due to strictness on alignment: + +* [Rust, how to slice into a byte array as if it were a float array? - Stack Overflow][2] +* [Re-interpret slice of bytes (e.g. [u8]) as slice of [f32] - help - The Rust Programming Language Forum][3] +* [How to transmute a u8 buffer to struct in Rust? - Stack Overflow][4] + +So the casting of `Data` in the blog post's C++ are now `slice::align_to{_mut}` calls + +## Example Crashing Testcase + +The fuzz harness typically discovers a non-optimized crashing testcase in very little time, however +the artifact file checked into the example is the minimal crashing 16 bytes of `[-inf, inf]`. +These two values add to produce a `NaN` value just as Erik showed that the C++ runtime produced. + +[1]: https://rigtorp.se/fuzzing-floating-point-code/ +[2]: https://stackoverflow.com/a/73174764 +[3]: https://users.rust-lang.org/t/re-interpret-slice-of-bytes-e-g-u8-as-slice-of-f32/34551 +[4]: https://stackoverflow.com/a/59292352 diff --git a/example_crossover/fuzz/Cargo.toml b/example_crossover/fuzz/Cargo.toml new file mode 100644 index 0000000..31b8783 --- /dev/null +++ b/example_crossover/fuzz/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "example_crossover_fuzz" +version = "0.1.0" +authors = ["R. Elliott Childre"] +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +rand = "0.8" +libfuzzer-sys = { path = "../.." } +example_crossover = { path = ".." } + +[[bin]] +name = "boom" +path = "fuzz_targets/boom.rs" diff --git a/example_crossover/fuzz/artifacts/boom/crash-6f37ac2111469380f475f6c307a915f5917fed6b b/example_crossover/fuzz/artifacts/boom/crash-6f37ac2111469380f475f6c307a915f5917fed6b new file mode 100644 index 0000000..a86bb86 Binary files /dev/null and b/example_crossover/fuzz/artifacts/boom/crash-6f37ac2111469380f475f6c307a915f5917fed6b differ diff --git a/example_crossover/fuzz/fuzz_targets/boom.rs b/example_crossover/fuzz/fuzz_targets/boom.rs new file mode 100755 index 0000000..f34bfbf --- /dev/null +++ b/example_crossover/fuzz/fuzz_targets/boom.rs @@ -0,0 +1,128 @@ +#![no_main] + +use example_crossover::sum; +use libfuzzer_sys::{fuzz_crossover, fuzz_mutator, fuzz_target}; +use rand::distributions::{Bernoulli, Distribution, Uniform}; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; +use std::mem::size_of; + +fuzz_target!(|data: &[u8]| { + let (_, floats, _) = unsafe { data.align_to::() }; + + let res = sum(floats); + + assert!( + !res.is_nan(), + "The sum of the following f64's resulted in a NaN: {floats:?}" + ); +}); + +fn rfp(rng: &mut StdRng) -> f64 { + match Uniform::new_inclusive(0, 10).sample(rng) { + 0 => f64::NAN, + 1 => f64::MIN, + 2 => f64::MAX, + 3 => -f64::MIN, + 4 => -f64::MAX, + 5 => f64::EPSILON, + 6 => -f64::EPSILON, + 7 => f64::INFINITY, + 8 => f64::NEG_INFINITY, + 9 => 0.0, + 10 => Uniform::new_inclusive(-1.0, 1.0).sample(rng), + _ => 0.0, + } +} + +fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| { + let mut gen = StdRng::seed_from_u64(seed.into()); + + match Uniform::new_inclusive(0, 3).sample(&mut gen) { + 0 => { + // "Change [an] element" + + // Not altering the size, so decode the intended space (i.e. `size`) as floats + let (_, floats, _) = unsafe { data[..size].align_to_mut::() }; + + if !floats.is_empty() { + let d = Uniform::new(0, floats.len()); + floats[d.sample(&mut gen)] = rfp(&mut gen); + } + } + 1 => { + // "Add [an] element [to the end]" + let plus_one = size + size_of::(); + if plus_one <= max_size { + // Adding 1, f64 to the size, so decode the intended space (i.e. + // `size`) plus one more (since we just checked it will fit) as floats + let (_, floats, _) = unsafe { data[..plus_one].align_to_mut::() }; + + let last = floats.last_mut().unwrap(); + *last = rfp(&mut gen); + + return plus_one; + } + } + 2 => { + // "Delete [the end] element" + + // Attempting to shrink the size by 1, f64, so decode the intended + // space (i.e. `size`) as floats and see if we have any + let (_, floats, _) = unsafe { data[..size].align_to::() }; + + if !floats.is_empty() { + return size - size_of::(); + } + } + 3 => { + // "Shuffle [the] elements" + + // Not altering the size, so decode the intended space (i.e. `size`) as floats + let (_, floats, _) = unsafe { data[..size].align_to_mut::() }; + floats.shuffle(&mut gen); + } + _ => unreachable!(), + }; + + size +}); + +fuzz_crossover!(|data1: &[u8], data2: &[u8], out: &mut [u8], seed: u32| { + let mut gen = StdRng::seed_from_u64(seed.into()); + + let bd = Bernoulli::new(0.5).unwrap(); + + // Decode each source to see how many floats we can pull with proper + // alignment, and destination as to how many will fit with proper alignment + // + // Keep track of the unaligned prefix to `out`, as we will need to remember + // that those bytes will remain prepended to the actual floats that we + // write into the out buffer. + let (out_pref, out_floats, _) = unsafe { out.align_to_mut::() }; + let (_, d1_floats, _) = unsafe { data1.align_to::() }; + let (_, d2_floats, _) = unsafe { data2.align_to::() }; + + // Given that the sources and destinations may have drastically fewer + // available aligned floats than decoding allows for; see which has the + // smallest number. + let n = *[out_floats.len(), d1_floats.len(), d2_floats.len()] + .iter() + .min() + .unwrap(); + + // Put into the destination, floats from either data1 or data2 if the + // Bernoulli distribution succeeds or fails + for i in 0..n { + out_floats[i] = if bd.sample(&mut gen) { + d1_floats[i] + } else { + d2_floats[i] + }; + } + + // Now that we have written the true floats, report back to the fuzzing + // engine that we left the unaligned `out` prefix bytes at the beginning of + // `out` and also then the floats that we wrote into the aligned float + // section. + out_pref.len() * size_of::() + n * size_of::() +}); diff --git a/example_crossover/src/lib.rs b/example_crossover/src/lib.rs new file mode 100644 index 0000000..550d49f --- /dev/null +++ b/example_crossover/src/lib.rs @@ -0,0 +1,5 @@ +pub fn sum(floats: &[f64]) -> f64 { + floats + .iter() + .fold(0.0, |a, b| if b.is_nan() { a } else { a + b }) +} diff --git a/src/lib.rs b/src/lib.rs index ddf0d35..1cd12ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -538,3 +538,148 @@ pub fn fuzzer_mutate(data: &mut [u8], size: usize, max_size: usize) -> usize { assert!(new_size <= data.len()); new_size } + +/// Define a custom cross-over function to combine test cases. +/// +/// This is optional, and libFuzzer will use its own, default cross-over strategy +/// if this is not provided. (As of the time of writing, this default strategy +/// takes alternating byte sequences from the two test cases, to construct the +/// new one) (see `FuzzerCrossOver.cpp`) +/// +/// This could potentially be useful if your input is, for instance, a +/// sequence of fixed sized, multi-byte values and the crossover could then +/// merge discrete values rather than joining parts of a value. +/// +/// ## Implementation Contract +/// +/// The original, read-only inputs are given in the full slices of `data1`, and +/// `data2` (as opposed to the, potentially, partial slice of `data` in +/// [the `fuzz_mutator!` macro][crate::fuzz_mutator]). +/// +/// You must place the new input merged from the two existing inputs' data +/// into `out` and return the size of the relevant data written to that slice. +/// +/// The deterministic requirements from [the `fuzz_mutator!` macro][crate::fuzz_mutator] +/// apply as well to the `seed` parameter +/// +/// ## Example: Floating-Point Sum NaN +/// +/// ```no_run +/// #![no_main] +/// +/// use libfuzzer_sys::{fuzz_crossover, fuzz_mutator, fuzz_target, fuzzer_mutate}; +/// use rand::{rngs::StdRng, Rng, SeedableRng}; +/// use std::mem::size_of; +/// +/// fuzz_target!(|data: &[u8]| { +/// let (_, floats, _) = unsafe { data.align_to::() }; +/// +/// let res = floats +/// .iter() +/// .fold(0.0, |a, b| if b.is_nan() { a } else { a + b }); +/// +/// assert!( +/// !res.is_nan(), +/// "The sum of the following floats resulted in a NaN: {floats:?}" +/// ); +/// }); +/// +/// // Inject some ...potentially problematic values to make the example close +/// // more quickly. +/// fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| { +/// let mut gen = StdRng::seed_from_u64(seed.into()); +/// +/// let (_, floats, _) = unsafe { data[..size].align_to_mut::() }; +/// +/// let x = gen.gen_range(0..=1000); +/// if x == 0 && !floats.is_empty() { +/// floats[0] = f64::INFINITY; +/// } else if x == 1000 && floats.len() > 1 { +/// floats[1] = f64::NEG_INFINITY; +/// } else { +/// return fuzzer_mutate(data, size, max_size); +/// } +/// +/// size +/// }); +/// +/// fuzz_crossover!(|data1: &[u8], data2: &[u8], out: &mut [u8], _seed: u32| { +/// // Decode each source to see how many floats we can pull with proper +/// // alignment, and destination as to how many will fit with proper alignment +/// // +/// // Keep track of the unaligned prefix to `out`, as we will need to remember +/// // that those bytes will remain prepended to the actual floats that we +/// // write into the out buffer. +/// let (out_pref, out_floats, _) = unsafe { out.align_to_mut::() }; +/// let (_, d1_floats, _) = unsafe { data1.align_to::() }; +/// let (_, d2_floats, _) = unsafe { data2.align_to::() }; +/// +/// // Put into the destination, floats first from data1 then from data2, ...if +/// // possible given the size of `out` +/// let mut i: usize = 0; +/// for float in d1_floats.iter().chain(d2_floats).take(out_floats.len()) { +/// out_floats[i] = *float; +/// i += 1; +/// } +/// +/// // Now that we have written the true floats, report back to the fuzzing +/// // engine that we left the unaligned `out` prefix bytes at the beginning of +/// // `out` and also then the floats that we wrote into the aligned float +/// // section. +/// out_pref.len() * size_of::() + i * size_of::() +/// }); +/// ``` +/// +/// This example is a minimized version of [Erik Rigtorp's floating point summation fuzzing example][1]. +/// A more detailed version of this experiment can be found in the +/// `example_crossover` directory. +/// +/// [1]: https://rigtorp.se/fuzzing-floating-point-code/ +#[macro_export] +macro_rules! fuzz_crossover { + ( + | + $data1:ident : &[u8] , + $data2:ident : &[u8] , + $out:ident : &mut [u8] , + $seed:ident : u32 $(,)* + | + $body:block + ) => { + /// Auto-generated function. Do not use; only for LibFuzzer's + /// consumption. + #[export_name = "LLVMFuzzerCustomCrossOver"] + #[doc(hidden)] + pub unsafe fn rust_fuzzer_custom_crossover( + $data1: *const u8, + size1: usize, + $data2: *const u8, + size2: usize, + $out: *mut u8, + max_out_size: usize, + $seed: std::os::raw::c_uint, + ) -> usize { + let $data1: &[u8] = std::slice::from_raw_parts($data1, size1); + let $data2: &[u8] = std::slice::from_raw_parts($data2, size2); + let $out: &mut [u8] = std::slice::from_raw_parts_mut($out, max_out_size); + + // `unsigned int` is generally a `u32`, but not on all targets. Do + // an infallible (and potentially lossy, but that's okay because it + // preserves determinism) conversion. + let $seed = $seed as u32; + + // Define and invoke a new, safe function so that the body doesn't + // inherit `unsafe`. + fn custom_crossover( + $data1: &[u8], + $data2: &[u8], + $out: &mut [u8], + $seed: u32, + ) -> usize { + $body + } + + custom_crossover($data1, $data2, $out, $seed) + } + }; +}