Skip to content

Commit

Permalink
Add loop contracts and harness for run_utf8_validation
Browse files Browse the repository at this point in the history
  • Loading branch information
qinheping committed Nov 8, 2024
1 parent 25ad12b commit 4d1a924
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 2 deletions.
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@
#![feature(unboxed_closures)]
#![feature(unsized_fn_params)]
#![feature(with_negative_coherence)]
#![feature(proc_macro_hygiene)]
// tidy-alphabetical-end
//
// Target features:
Expand Down
31 changes: 31 additions & 0 deletions library/core/src/str/validations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
use super::Utf8Error;
use crate::mem;

#[cfg(kani)]
use crate::kani;

/// Returns the initial codepoint accumulator for the first byte.
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
/// for width 3, and 3 bits for width 4.
Expand Down Expand Up @@ -132,6 +135,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
let align = v.as_ptr().align_offset(usize_bytes);

#[safety::loop_invariant(index <= len + ascii_block_size)]
while index < len {
let old_offset = index;
macro_rules! err {
Expand Down Expand Up @@ -211,6 +215,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
// until we find a word containing a non-ascii byte.
if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
let ptr = v.as_ptr();
#[safety::loop_invariant(index <= blocks_end + ascii_block_size && align.wrapping_sub(index) % usize_bytes == 0)]
while index < blocks_end {
// SAFETY: since `align - index` and `ascii_block_size` are
// multiples of `usize_bytes`, `block = ptr.add(index)` is
Expand All @@ -228,6 +233,7 @@ pub(super) const fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
index += ascii_block_size;
}
// step from the point where the wordwise loop stopped
#[safety::loop_invariant(index <= len)]
while index < len && v[index] < 128 {
index += 1;
}
Expand Down Expand Up @@ -271,3 +277,28 @@ pub const fn utf8_char_width(b: u8) -> usize {

/// Mask of the value bits of a continuation byte.
const CONT_MASK: u8 = 0b0011_1111;

#[cfg(kani)]
#[unstable(feature = "kani", issue = "none")]
pub mod verify {
use super::*;

#[kani::proof]
#[kani::unwind(8)]
pub fn check_run_utf8_validation() {
if kani::any() {
// TODO: ARR_SIZE can be `std::usize::MAX` with cbmc argument
// `--arrays-uf-always`
const ARR_SIZE: usize = 1000;
let mut x: [u8; ARR_SIZE] = kani::any();
let mut xs = kani::slice::any_slice_of_array_mut(&mut x);
run_utf8_validation(xs);
} else {
let ptr = kani::any_where::<usize, _>(|val| *val != 0) as *const u8;
kani::assume(ptr.is_aligned());
unsafe{
run_utf8_validation(crate::slice::from_raw_parts(ptr, 0));
}
}
}
}
2 changes: 1 addition & 1 deletion scripts/run-kani.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ main() {

echo "Running Kani verify-std command..."

"$kani_path" verify-std -Z unstable-options ./library --target-dir "$temp_dir_target" -Z function-contracts -Z mem-predicates --output-format=terse $command_args
"$kani_path" verify-std -Z unstable-options ./library --target-dir "$temp_dir_target" -Z function-contracts -Z mem-predicates -Z loop-contracts --output-format=terse $command_args --enable-unstable --cbmc-args --object-bits 12
}

main
Expand Down
2 changes: 1 addition & 1 deletion tool_config/kani-version.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# incompatible with the verify-std repo.

[kani]
commit = "2565ef65767a696f1d519b42621b4e502e8970d0"
commit = "8400296f5280be4f99820129bc66447e8dff63f4"

0 comments on commit 4d1a924

Please sign in to comment.