Skip to content

Commit

Permalink
implement to/from hex and to/from bin with sign
Browse files Browse the repository at this point in the history
  • Loading branch information
ekiwi committed Oct 24, 2024
1 parent a78f016 commit e76bd01
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 45 deletions.
14 changes: 14 additions & 0 deletions proptest-regressions/bv/arithmetic.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc d8a2060964925b8db577e1473bdf9bedcd9e31d5b19f0404ddb8d174620a751e # shrinks to a = "10"
cc 44630f29efa8b6c62293a52fe601e58198a897a77ea6b1db70e568916d1f1ceb # shrinks to s = "10", by = 0
cc 08799c0deb3bcaa4c9448cbda9feb83b9621c2ad11455bbbe4c24fb6b547d8fe # shrinks to s = "10", by = 0
cc 5ed138da5c5c0a836870c701076ad0f812e3cbbb01dcc2899bf6a04a61641976 # shrinks to a = "0", b = "0"
cc a24990b3f9c8aebaa9ffae937d99116b2faa431d9f0e9270cc49ddf114088f18 # shrinks to (s, by) = ("00", 2)
cc e39d7e5e9be91ead6334200699a5d76f84d0886cda8cab326478a41378a732f2 # shrinks to (s, by) = ("00", 2)
cc 1a2e90bbd8408cc05207a971562051c72a77f58d30b70fd1ff095e595eaca284 # shrinks to (s, by) = ("00", 2)
cc f6a72ed519966d03d42decd8e0d0ba48fee677d064ea3d70516905a0e76aff78 # shrinks to (s, msb, lsb) = ("11", 0, 0)
7 changes: 7 additions & 0 deletions proptest-regressions/bv/borrowed.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 7938648cf311575fe4c076ae8e41bbf9f31eec421cfe3213ea3c22e4c380f422 # shrinks to a = "10"
11 changes: 11 additions & 0 deletions proptest-regressions/bv/io/strings.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 3599925df338dffa884751df1675b848900a8b36d7bd271430ca87f0c3226131 # shrinks to s = "10"
cc 053f7e917a0b8fe3b01eb06eb4da695561ec6d38eca61e08aff9b42a6d9e9097 # shrinks to s = "+"
cc 061b20145dddba3e28e3602926dc68debb176b42afeb60a2f0682cb58c4a305e # shrinks to s = "-0"
cc 635bb11fb0658be232767b6fe15249bc1cb4c44a845198562059476c99e1189d # shrinks to s = "+Aaaa0AA0aaaA0aAA00A0aAaaaa00aA00"
cc 5fa4e65fe3869a2e2dd3e85963a540e5b5c3067f158bb69fb1eb334710b91f6d # shrinks to s = "-aAaa00aAaaAAaAa0"
1 change: 1 addition & 0 deletions src/bv/borrowed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ mod tests {
fn borrowed_hash() {
check_hash(BitVecValue::zero(1));
check_hash(BitVecValue::zero(1000000));
check_hash(BitVecValue::from_bit_str("11").unwrap());
}

fn bit_str_arg() -> impl Strategy<Value = String> {
Expand Down
243 changes: 198 additions & 45 deletions src/bv/io/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// released under BSD 3-Clause License
// author: Kevin Laeufer <[email protected]>

use crate::bv::arithmetic::{is_neg, negate_in_place};
use crate::{WidthInt, Word};

#[derive(Debug, Clone, PartialEq, Eq)]
Expand All @@ -17,12 +18,29 @@ pub enum IntErrorKind {
ExceedsWidth,
}

/// Interprets the bits as a two's complement integer.
pub(crate) fn to_bit_str_signed(values: &[Word], width: WidthInt) -> String {
if is_neg(values, width) {
let mut copy = Vec::from(values);
negate_in_place(&mut copy, width);
let mut out = String::with_capacity(width as usize + 1);
out.push('-');
to_bit_str_internal(&copy, width - 1, out)
} else {
to_bit_str(values, width - 1)
}
}

pub(crate) fn to_bit_str(values: &[Word], width: WidthInt) -> String {
let out = String::with_capacity(width as usize);
to_bit_str_internal(values, width, out)
}

fn to_bit_str_internal(values: &[Word], width: WidthInt, mut out: String) -> String {
if width == 0 {
return "".to_string();
}
let start_bit = (width - 1) % Word::BITS;
let mut out = String::with_capacity(width as usize);
let msb = values.last().unwrap();
for ii in (0..(start_bit + 1)).rev() {
let value = (msb >> ii) & 1;
Expand Down Expand Up @@ -50,13 +68,39 @@ const BITS_PER_HEX_DIGIT: u32 = 4;
const WORD_HEX_DIGITS: u32 = Word::BITS / BITS_PER_HEX_DIGIT;
const WORD_HEX_MASK: Word = ((1 as Word) << BITS_PER_HEX_DIGIT) - 1;

/// Interprets the bits as a two's complement integer.
pub(crate) fn to_hex_str_signed(values: &[Word], width: WidthInt) -> String {
if is_neg(values, width) {
let mut copy = Vec::from(values);
negate_in_place(&mut copy, width);
let mut out = String::with_capacity(width as usize + 1);
out.push('-');
to_hex_str_internal(select_words_for_width(&copy, width - 1), width - 1, out)
} else {
to_hex_str(select_words_for_width(values, width - 1), width - 1)
}
}

#[inline]
fn select_words_for_width(words: &[Word], width: WidthInt) -> &[Word] {
let words_needed = width.div_ceil(Word::BITS) as usize;
debug_assert!(words.len() >= words_needed, "not enough words!");
&words[0..words_needed]
}

pub(crate) fn to_hex_str(values: &[Word], width: WidthInt) -> String {
let out = String::with_capacity(width.div_ceil(BITS_PER_HEX_DIGIT) as usize);
to_hex_str_internal(values, width, out)
}

fn to_hex_str_internal(values: &[Word], width: WidthInt, mut out: String) -> String {
debug_assert_eq!(width.div_ceil(Word::BITS) as usize, values.len());
if width == 0 {
return "".to_string();
}
let bits_in_msb = width % Word::BITS;
let digits_in_msb = bits_in_msb.div_ceil(BITS_PER_HEX_DIGIT);
let mut out = String::with_capacity(width.div_ceil(BITS_PER_HEX_DIGIT) as usize);
let msb = values.last().unwrap();
for ii in (0..digits_in_msb).rev() {
let value = (msb >> (ii * BITS_PER_HEX_DIGIT)) & WORD_HEX_MASK;
Expand All @@ -73,18 +117,20 @@ pub(crate) fn to_hex_str(values: &[Word], width: WidthInt) -> String {
}

pub(crate) fn determine_width_from_str_radix(value: &str, radix: u32) -> WidthInt {
let starts_with_minus = value.starts_with('-');
let num_digits = match value.as_bytes() {
[] => 0,
[b'+' | b'-'] => 0,
[b'+' | b'-', digits @ ..] => digits.len() as WidthInt,
digits => digits.len() as WidthInt,
};

match radix {
let base_width = match radix {
2 => num_digits,
16 => num_digits * 4,
_ => todo!(),
}
};
base_width + starts_with_minus as WidthInt
}

#[inline]
Expand Down Expand Up @@ -114,29 +160,71 @@ pub(crate) fn from_str_radix(
return Ok(());
}

// treat string as bytes
let digits = value.as_bytes();

// check whether the string is negative
let (is_negative, digits) = match digits {
[b'+' | b'-'] => {
return Err(ParseIntError {
kind: IntErrorKind::InvalidDigit,
});
}
[b'+', rest @ ..] => (false, rest),
[b'-', rest @ ..] => (true, rest),
_ => (false, digits),
// remove any minus
let (is_negative, value) = match value.strip_prefix('-') {
Some(value) => (true, value),
None => (false, value),
};

match radix {
2 => parse_base_2(digits, out, width)?,
10 => parse_base_10(digits, out, width)?,
16 => parse_base_16(digits, out)?,
_ => todo!("Implement support for base {radix}. Currently the following bases are available: 2, 10, 16"),
};
if value.is_empty() {
return Err(ParseIntError {
kind: IntErrorKind::InvalidDigit,
});
}

// use Rust standard parsing infrastructure when the result needs to fit into a u64
if let [out] = out {
debug_assert!(width <= 64);
*out = match u64::from_str_radix(value, radix) {
Ok(v) => v,
Err(e) => {
let kind = match e.kind() {
std::num::IntErrorKind::NegOverflow | std::num::IntErrorKind::PosOverflow => {
IntErrorKind::ExceedsWidth
}
_ => IntErrorKind::InvalidDigit,
};
return Err(ParseIntError { kind });
}
};
} else {
debug_assert_eq!(width.div_ceil(Word::BITS) as usize, out.len());

// use our own custom implementation for larger sizes
// treat string as bytes
let digits = value.as_bytes();

// strip '+'
let digits = match digits {
[b'+'] => {
return Err(ParseIntError {
kind: IntErrorKind::InvalidDigit,
});
}
[b'+', rest @ ..] => rest,
_ => digits,
};

match radix {
2 => parse_base_2(digits, out, width)?,
10 => parse_base_10(digits, out, width)?,
16 => parse_base_16(digits, out)?,
_ => todo!("Implement support for base {radix}. Currently the following bases are available: 2, 10, 16"),
};
}

// TODO: check width
// let m = super::super::arithmetic::mask(width);
// if *out != *out & m {
// Err(ParseIntError {
// kind: IntErrorKind::ExceedsWidth,
// })
// } else {
// Ok(())
// }

if is_negative {
crate::bv::arithmetic::negate_in_place(out, width)
negate_in_place(out, width)
}
Ok(())
}
Expand Down Expand Up @@ -168,6 +256,7 @@ fn parse_base_10(
out: &mut [Word],
max_width: WidthInt,
) -> Result<WidthInt, ParseIntError> {
// let other = BitVecValue::
todo!()
}

Expand Down Expand Up @@ -218,15 +307,39 @@ mod tests {
use crate::bv::owned::value_vec_zeros;
use proptest::proptest;

fn do_test_from_to_bit_str(s: String) {
fn do_test_from_to_bit_str(s: &str) {
let words = s.len().div_ceil(Word::BITS as usize);
let mut out = vec![0; words];
let width = determine_width_from_str_radix(&s, 2);
assert_eq!(width as usize, s.len());
from_str_radix(&s, 2, &mut out, width).unwrap();

// test width determination function
let width = determine_width_from_str_radix(s, 2);
if s.starts_with('+') {
assert_eq!(width as usize, s.len() - 1);
} else {
assert_eq!(width as usize, s.len());
}

// do actual conversion
from_str_radix(s, 2, &mut out, width).unwrap();
crate::bv::arithmetic::assert_unused_bits_zero(&out, width);
let s_out = to_bit_str(&out, width);
assert_eq!(s, s_out);
let s_out = if s.starts_with('-') {
to_bit_str_signed(&out, width)
} else {
to_bit_str(&out, width)
};

// test for expected output
if let Some(without_plus) = s.strip_prefix('+') {
assert_eq!(without_plus, s_out);
} else if let Some(without_minus) = s.strip_prefix('-') {
if without_minus.chars().all(|c| c == '0') {
assert_eq!(without_minus, s_out);
} else {
assert_eq!(s, s_out);
}
} else {
assert_eq!(s, s_out);
}
}

#[test]
Expand All @@ -236,24 +349,61 @@ mod tests {
assert_eq!(to_bit_str(&input, 33), "0".repeat(33));
}

fn do_test_from_to_hex_str(s: String) {
let words = (s.len() * 4).div_ceil(Word::BITS as usize);
#[test]
fn test_from_to_bit_str_regression() {
do_test_from_to_bit_str("+0");
do_test_from_to_bit_str("-0");
do_test_from_to_bit_str("-1");
do_test_from_to_bit_str("-11");
}

fn do_test_from_to_hex_str(s: &str) {
// test width determination function
let width = determine_width_from_str_radix(s, 16);
if s.starts_with('+') {
assert_eq!(width as usize, (s.len() - 1) * 4);
} else if s.starts_with('-') {
assert_eq!(width as usize, (s.len() - 1) * 4 + 1);
} else {
assert_eq!(width as usize, s.len() * 4);
}

// do actual conversion
let words = width.div_ceil(Word::BITS) as usize;
let mut out = vec![0; words];
let width = determine_width_from_str_radix(&s, 16);
assert_eq!(width as usize, s.len() * 4);
from_str_radix(&s, 16, &mut out, width).unwrap();
from_str_radix(s, 16, &mut out, width).unwrap();
crate::bv::arithmetic::assert_unused_bits_zero(&out, width);
let s_out = to_hex_str(&out, width);
assert_eq!(s.to_ascii_lowercase(), s_out);
let s_out = if s.starts_with('-') {
to_hex_str_signed(&out, width)
} else {
to_hex_str(&out, width)
};

// test for expected output
if let Some(without_plus) = s.strip_prefix('+') {
assert_eq!(without_plus.to_ascii_lowercase(), s_out);
} else if let Some(without_minus) = s.strip_prefix('-') {
if without_minus.chars().all(|c| c == '0') {
assert_eq!(without_minus.to_ascii_lowercase(), s_out);
} else {
assert_eq!(s.to_ascii_lowercase(), s_out);
}
} else {
assert_eq!(s.to_ascii_lowercase(), s_out);
}
}

#[test]
fn test_from_to_hex_str_regression() {
assert_eq!(hex_digit_value(b'a').unwrap(), 10);
assert_eq!(hex_digit_value(b'A').unwrap(), 10);
do_test_from_to_hex_str("a".to_string());
do_test_from_to_hex_str("A".to_string());
do_test_from_to_hex_str("0aaaA0a0aAA0aaaA".to_string());
do_test_from_to_hex_str("a");
do_test_from_to_hex_str("A");
do_test_from_to_hex_str("0aaaA0a0aAA0aaaA");
do_test_from_to_hex_str("+A");
do_test_from_to_hex_str("0");
do_test_from_to_hex_str("+aaaa0aa0aaaa0aaa00a0aaaaaa00aa00");
do_test_from_to_hex_str("-aaaa00aaaaaaaaa0");
}

#[test]
Expand All @@ -271,7 +421,10 @@ mod tests {
format!("{}aaaaaaaaaaaaaaaa", "0".repeat(9))
);
input[1] = 0xa4aa78;
assert_eq!(to_hex_str(&input, 6 * 4), "a4aa78aaaaaaaaaaaaaaaa");
assert_eq!(
to_hex_str(&input, 6 * 4 + Word::BITS),
"a4aa78aaaaaaaaaaaaaaaa"
);
// regressions test
let mut input = value_vec_zeros(64);
input[0] = 768603298337958570;
Expand All @@ -280,12 +433,12 @@ mod tests {

proptest! {
#[test]
fn test_from_to_bit_str(s in "[01]*") {
do_test_from_to_bit_str(s);
fn test_from_to_bit_str(s in "(([-+])?[01]+)|()") {
do_test_from_to_bit_str(&s);
}
#[test]
fn test_from_to_hex_str(s in "[01a-fA-F]*") {
do_test_from_to_hex_str(s);
fn test_from_to_hex_str(s in "(([-+])?[01a-fA-F]+)|()") {
do_test_from_to_hex_str(&s);
}
}
}
Loading

0 comments on commit e76bd01

Please sign in to comment.