From 4d1a92466d45343b9dd3bbee76e8b1517ac7afcb Mon Sep 17 00:00:00 2001 From: Qinheping Hu Date: Fri, 8 Nov 2024 12:13:45 -0600 Subject: [PATCH] Add loop contracts and harness for run_utf8_validation --- library/core/src/lib.rs | 1 + library/core/src/str/validations.rs | 31 +++++++++++++++++++++++++++++ scripts/run-kani.sh | 2 +- tool_config/kani-version.toml | 2 +- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index 280fda93a5494..7c0596768b784 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -230,6 +230,7 @@ #![feature(unboxed_closures)] #![feature(unsized_fn_params)] #![feature(with_negative_coherence)] +#![feature(proc_macro_hygiene)] // tidy-alphabetical-end // // Target features: diff --git a/library/core/src/str/validations.rs b/library/core/src/str/validations.rs index cca8ff74dda8b..e9e5d5dbee985 100644 --- a/library/core/src/str/validations.rs +++ b/library/core/src/str/validations.rs @@ -3,6 +3,9 @@ use super::Utf8Error; use crate::mem; +#[cfg(kani)] +use crate::kani; + /// Returns the initial codepoint accumulator for the first byte. /// The first byte is special, only want bottom 5 bits for width 2, 4 bits /// for width 3, and 3 bits for width 4. @@ -132,6 +135,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 }; let align = v.as_ptr().align_offset(usize_bytes); + #[safety::loop_invariant(index <= len + ascii_block_size)] while index < len { let old_offset = index; macro_rules! err { @@ -211,6 +215,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { // until we find a word containing a non-ascii byte. if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 { let ptr = v.as_ptr(); + #[safety::loop_invariant(index <= blocks_end + ascii_block_size && align.wrapping_sub(index) % usize_bytes == 0)] while index < blocks_end { // SAFETY: since `align - index` and `ascii_block_size` are // multiples of `usize_bytes`, `block = ptr.add(index)` is @@ -228,6 +233,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> { index += ascii_block_size; } // step from the point where the wordwise loop stopped + #[safety::loop_invariant(index <= len)] while index < len && v[index] < 128 { index += 1; } @@ -271,3 +277,28 @@ pub const fn utf8_char_width(b: u8) -> usize { /// Mask of the value bits of a continuation byte. const CONT_MASK: u8 = 0b0011_1111; + +#[cfg(kani)] +#[unstable(feature = "kani", issue = "none")] +pub mod verify { + use super::*; + + #[kani::proof] + #[kani::unwind(8)] + pub fn check_run_utf8_validation() { + if kani::any() { + // TODO: ARR_SIZE can be `std::usize::MAX` with cbmc argument + // `--arrays-uf-always` + const ARR_SIZE: usize = 1000; + let mut x: [u8; ARR_SIZE] = kani::any(); + let mut xs = kani::slice::any_slice_of_array_mut(&mut x); + run_utf8_validation(xs); + } else { + let ptr = kani::any_where::(|val| *val != 0) as *const u8; + kani::assume(ptr.is_aligned()); + unsafe{ + run_utf8_validation(crate::slice::from_raw_parts(ptr, 0)); + } + } + } +} diff --git a/scripts/run-kani.sh b/scripts/run-kani.sh index 8ce27dac5d207..1c62a05969e34 100755 --- a/scripts/run-kani.sh +++ b/scripts/run-kani.sh @@ -183,7 +183,7 @@ main() { echo "Running Kani verify-std command..." - "$kani_path" verify-std -Z unstable-options ./library --target-dir "$temp_dir_target" -Z function-contracts -Z mem-predicates --output-format=terse $command_args + "$kani_path" verify-std -Z unstable-options ./library --target-dir "$temp_dir_target" -Z function-contracts -Z mem-predicates -Z loop-contracts --output-format=terse $command_args --enable-unstable --cbmc-args --object-bits 12 } main diff --git a/tool_config/kani-version.toml b/tool_config/kani-version.toml index f75f2058e67f2..c28d156262481 100644 --- a/tool_config/kani-version.toml +++ b/tool_config/kani-version.toml @@ -2,4 +2,4 @@ # incompatible with the verify-std repo. [kani] -commit = "2565ef65767a696f1d519b42621b4e502e8970d0" +commit = "8400296f5280be4f99820129bc66447e8dff63f4"