Skip to content

Commit

Permalink
document new unsafe blocks in unsafe functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ctz committed Feb 24, 2025
1 parent 1813d3b commit 68fe8c6
Show file tree
Hide file tree
Showing 18 changed files with 107 additions and 37 deletions.
8 changes: 8 additions & 0 deletions graviola/src/low/aarch64/aes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ impl AesKey {

#[target_feature(enable = "aes,neon")]
unsafe fn _ctr(&self, initial_counter: &[u8; 16], cipher_inout: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// counter and inc are big endian, so must be vrev32q_u8'd before use
let counter = vld1q_u8(initial_counter.as_ptr().cast());
Expand Down Expand Up @@ -248,6 +249,7 @@ fn sub_word(w: u32) -> u32 {

#[target_feature(enable = "aes")]
unsafe fn _sub_word(w: u32) -> u32 {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// we have the `aese` instruction, which is
// `sub_word(shift_rows(w), S)`. however, fortunately
Expand All @@ -268,6 +270,7 @@ const RCON: [u32; 10] = [0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0

#[target_feature(enable = "aes")]
unsafe fn aes128_block(round_keys: &[uint8x16_t; 11], block_inout: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = vld1q_u8(block_inout.as_ptr() as *const _);
let block = _aes128_block(round_keys, block);
Expand All @@ -278,6 +281,7 @@ unsafe fn aes128_block(round_keys: &[uint8x16_t; 11], block_inout: &mut [u8]) {
#[target_feature(enable = "aes")]
#[inline]
unsafe fn _aes128_block(round_keys: &[uint8x16_t; 11], block: uint8x16_t) -> uint8x16_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = vaeseq_u8(block, round_keys[0]);
let block = vaesmcq_u8(block);
Expand Down Expand Up @@ -346,6 +350,7 @@ unsafe fn _aes128_8_blocks(
uint8x16_t,
uint8x16_t,
) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
round_8!(b0, b1, b2, b3, b4, b5, b6, b7, round_keys[0]);
round_8!(b0, b1, b2, b3, b4, b5, b6, b7, round_keys[1]);
Expand Down Expand Up @@ -380,6 +385,7 @@ unsafe fn _aes128_8_blocks(

#[target_feature(enable = "aes")]
unsafe fn aes256_block(round_keys: &[uint8x16_t; 15], block_inout: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = vld1q_u8(block_inout.as_ptr() as *const _);
let block = _aes256_block(round_keys, block);
Expand All @@ -390,6 +396,7 @@ unsafe fn aes256_block(round_keys: &[uint8x16_t; 15], block_inout: &mut [u8]) {
#[target_feature(enable = "aes")]
#[inline]
unsafe fn _aes256_block(round_keys: &[uint8x16_t; 15], block: uint8x16_t) -> uint8x16_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = vaeseq_u8(block, round_keys[0]);
let block = vaesmcq_u8(block);
Expand Down Expand Up @@ -444,6 +451,7 @@ unsafe fn _aes256_8_blocks(
uint8x16_t,
uint8x16_t,
) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
round_8!(b0, b1, b2, b3, b4, b5, b6, b7, round_keys[0]);
round_8!(b0, b1, b2, b3, b4, b5, b6, b7, round_keys[1]);
Expand Down
2 changes: 2 additions & 0 deletions graviola/src/low/aarch64/bignum_point_select_p256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub(crate) fn bignum_jac_point_select_p256(z: &mut [u64; 12], table: &[u64], ind

#[target_feature(enable = "neon")]
unsafe fn _select_aff_p256(z: &mut [u64; 8], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// SAFETY: u128 and uint32x4_t have same size and meaning
let mut acc0: uint32x4_t = mem::transmute(0u128);
Expand Down Expand Up @@ -65,6 +66,7 @@ unsafe fn _select_aff_p256(z: &mut [u64; 8], table: &[u64], index: u8) {

#[target_feature(enable = "neon")]
unsafe fn _select_jac_p256(z: &mut [u64; 12], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut acc0: uint32x4_t = mem::transmute(0u128);
let mut acc1 = acc0;
Expand Down
1 change: 1 addition & 0 deletions graviola/src/low/aarch64/bignum_point_select_p384.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub(crate) fn bignum_jac_point_select_p384(z: &mut [u64; 18], table: &[u64], ind

#[target_feature(enable = "neon")]
unsafe fn _select_jac_p384(z: &mut [u64; 18], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut acc0: uint32x4_t = mem::transmute(0u128);
let mut acc1 = acc0;
Expand Down
3 changes: 2 additions & 1 deletion graviola/src/low/aarch64/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ pub(crate) fn leave_cpu_state(old: u32) {

#[target_feature(enable = "neon")]
unsafe fn zero_neon_registers() {
// SAFETY: inline assembly. all written registers are listed as clobbers.
unsafe {
// SAFETY: inline assembly. all written registers are listed as clobbers.
core::arch::asm!(
" eor v0.16b, v0.16b, v0.16b",
" eor v1.16b, v1.16b, v1.16b",
Expand Down Expand Up @@ -131,6 +131,7 @@ pub(in crate::low) fn zero_bytes(ptr: *mut u8, len: usize) {
/// # Safety
/// The caller must ensure that there are `len` bytes readable at `a` and `b`,
pub(in crate::low) unsafe fn ct_compare_bytes(a: *const u8, b: *const u8, len: usize) -> u8 {
// SAFETY: inline assembly.
unsafe {
let mut acc = 0u8;
core::arch::asm!(
Expand Down
6 changes: 6 additions & 0 deletions graviola/src/low/aarch64/ghash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ macro_rules! reduce {

#[target_feature(enable = "neon,aes")]
unsafe fn _mul(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let (mut lo, mut mi, mut hi) = (zero(), zero(), zero());
let bx = xor_halves(b);
Expand All @@ -217,6 +218,7 @@ unsafe fn _mul8(
g: uint64x2_t,
h: uint64x2_t,
) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let (mut lo, mut mi, mut hi) = (zero(), zero(), zero());
mul!(lo, mi, hi, a, table.powers[7], table.powers_xor[7]);
Expand All @@ -233,6 +235,7 @@ unsafe fn _mul8(

#[target_feature(enable = "neon")]
unsafe fn xor_halves(h: uint64x2_t) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let hx = vextq_u64(h, h, 1);
veorq_u64(hx, h)
Expand All @@ -241,6 +244,7 @@ unsafe fn xor_halves(h: uint64x2_t) -> uint64x2_t {

#[target_feature(enable = "neon")]
unsafe fn gf128_big_endian(h: uint64x2_t) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// takes a raw hash subkey, and arranges that it can
// be used in big endian ordering.
Expand All @@ -267,6 +271,7 @@ unsafe fn gf128_big_endian(h: uint64x2_t) -> uint64x2_t {
#[inline]
#[target_feature(enable = "neon,aes")]
unsafe fn vmull_p64_fix(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let a = vgetq_lane_u64::<0>(a);
let b = vgetq_lane_u64::<0>(b);
Expand All @@ -277,6 +282,7 @@ unsafe fn vmull_p64_fix(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
#[inline]
#[target_feature(enable = "neon,aes")]
unsafe fn vmull_high_p64_fix(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let a = vgetq_lane_u64::<1>(a);
let b = vgetq_lane_u64::<1>(b);
Expand Down
1 change: 1 addition & 0 deletions graviola/src/low/aarch64/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ macro_rules! round {

#[target_feature(enable = "neon,sha2")]
unsafe fn sha256(state: &mut [u32; 8], blocks: &[u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut state0 = vld1q_u32(state[0..4].as_ptr());
let mut state1 = vld1q_u32(state[4..8].as_ptr());
Expand Down
17 changes: 17 additions & 0 deletions graviola/src/low/inline_assembly_safety.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,20 @@
//! For this reason, it is unsound if `ret` is called in the outer
//! frame. However, our inline assembly can contain leaf internal
//! functions: these may `ret` back to the outer frame.
//!
//! # Safety of intrinsics
//!
//! The above sections "Using unsupported instructions" also apply
//! to intrinsics, and the same arrangements exist to avoid ever
//! issuing an unsupported instruction.
//!
//! In general, intrinsics are less hazardous to use than inline
//! assembly. However, since they are intended to be drop-in
//! replacements for their counterparts in C/C++, they are less
//! Rust-friendly than they could otherwise be. For example,
//! an analog of `_mm_loadu_si128` could take `&[u8; 16]` as its
//! argument, rather than a pointer. That would externalise the
//! requirements on that function, and allow it to be safe
//! (though only if `target_feature` `sse2` was statically
//! guaranteed at compile-time, and would require safe-transmute
//! to be available for non-byte types).
4 changes: 4 additions & 0 deletions graviola/src/low/x86_64/aes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ macro_rules! expand_128 {

#[target_feature(enable = "aes,avx")]
unsafe fn aes128_expand(key: &[u8; 16], out: &mut [__m128i; 11]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut t1 = _mm_lddqu_si128(key.as_ptr() as *const _);
out[0] = t1;
Expand Down Expand Up @@ -195,6 +196,7 @@ macro_rules! expand_256 {

#[target_feature(enable = "aes,avx")]
unsafe fn aes256_expand(key: &[u8; 32], out: &mut [__m128i; 15]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut t1 = _mm_lddqu_si128(key.as_ptr() as *const _);
let mut t3 = _mm_lddqu_si128(key[16..].as_ptr() as *const _);
Expand All @@ -221,6 +223,7 @@ unsafe fn aes256_expand(key: &[u8; 32], out: &mut [__m128i; 15]) {

#[target_feature(enable = "aes,avx")]
unsafe fn aes128_block(round_keys: &[__m128i; 11], block_inout: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = _mm_lddqu_si128(block_inout.as_ptr() as *const _);
let block = _mm_xor_si128(block, round_keys[0]);
Expand All @@ -240,6 +243,7 @@ unsafe fn aes128_block(round_keys: &[__m128i; 11], block_inout: &mut [u8]) {

#[target_feature(enable = "aes,avx")]
unsafe fn aes256_block(round_keys: &[__m128i; 15], block_inout: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let block = _mm_lddqu_si128(block_inout.as_ptr() as *const _);
let block = _mm_xor_si128(block, round_keys[0]);
Expand Down
59 changes: 33 additions & 26 deletions graviola/src/low/x86_64/aes_gcm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@ unsafe fn _cipher<const ENC: bool>(
aad: &[u8],
cipher_inout: &mut [u8],
) {
unsafe {
ghash.add(aad);
ghash.add(aad);

let (rk_first, rks, rk_last) = key.round_keys();
let (rk_first, rks, rk_last) = key.round_keys();

let mut counter = Counter::new(initial_counter);
let mut by8_iter = cipher_inout.chunks_exact_mut(128);
let mut counter = Counter::new(initial_counter);
let mut by8_iter = cipher_inout.chunks_exact_mut(128);

for blocks in by8_iter.by_ref() {
for blocks in by8_iter.by_ref() {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// prefetch to avoid any stall later
_mm_prefetch(blocks.as_ptr().add(0) as *const _, _MM_HINT_T0);
_mm_prefetch(blocks.as_ptr().add(64) as *const _, _MM_HINT_T0);
Expand Down Expand Up @@ -135,18 +136,21 @@ unsafe fn _cipher<const ENC: bool>(
let a1 = _mm_xor_si128(ghash.current, a1);
ghash.current = ghash::_mul8(ghash.table, a1, a2, a3, a4, a5, a6, a7, a8);
}
}

let cipher_inout = by8_iter.into_remainder();
let cipher_inout = by8_iter.into_remainder();

if !ENC {
ghash.add(cipher_inout);
}
if !ENC {
ghash.add(cipher_inout);
}

{
let mut blocks_iter = cipher_inout.chunks_exact_mut(16);
for block in blocks_iter.by_ref() {
let c1 = counter.next();
{
let mut blocks_iter = cipher_inout.chunks_exact_mut(16);
for block in blocks_iter.by_ref() {
let c1 = counter.next();

// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut c1 = _mm_xor_si128(c1, rk_first);

for rk in rks {
Expand All @@ -159,16 +163,19 @@ unsafe fn _cipher<const ENC: bool>(

_mm_storeu_si128(block.as_mut_ptr() as *mut _, c1);
}
}

let cipher_inout = blocks_iter.into_remainder();
if !cipher_inout.is_empty() {
let mut block = [0u8; 16];
let len = cipher_inout.len();
debug_assert!(len < 16);
block[..len].copy_from_slice(cipher_inout);
let cipher_inout = blocks_iter.into_remainder();
if !cipher_inout.is_empty() {
let mut block = [0u8; 16];
let len = cipher_inout.len();
debug_assert!(len < 16);
block[..len].copy_from_slice(cipher_inout);

let c1 = counter.next();
let c1 = counter.next();

// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut c1 = _mm_xor_si128(c1, rk_first);

for rk in rks {
Expand All @@ -181,15 +188,15 @@ unsafe fn _cipher<const ENC: bool>(
let c1 = _mm_xor_si128(c1, p1);

_mm_storeu_si128(block.as_mut_ptr() as *mut _, c1);

cipher_inout.copy_from_slice(&block[..len]);
}
}

if ENC {
ghash.add(cipher_inout);
cipher_inout.copy_from_slice(&block[..len]);
}
}

if ENC {
ghash.add(cipher_inout);
}
}

/// This stores the next counter value, in big endian.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub(crate) fn bignum_copy_row_from_table_16_avx2(

#[target_feature(enable = "avx,avx2")]
unsafe fn _bignum_copy_row_from_table_16_avx2(z: &mut [u64], table: &[u64], index: u64) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// SAFETY: prefetches do not fault and are not architecturally visible
_mm_prefetch(table.as_ptr().cast(), _MM_HINT_T0);
Expand All @@ -41,7 +42,7 @@ unsafe fn _bignum_copy_row_from_table_16_avx2(z: &mut [u64], table: &[u64], inde
let mask = _mm256_cmpeq_epi64(index, desired_index);
index = _mm256_add_epi64(index, ones);

// SAFETY: `row` is exactly 16 words; `loadu` does relaxes 256-bit alignment req.
// SAFETY: `row` is exactly 16 words; `loadu` relaxes 256-bit alignment req.
let row0 = _mm256_loadu_si256(row.as_ptr().add(0).cast());
let row1 = _mm256_loadu_si256(row.as_ptr().add(4).cast());
let row2 = _mm256_loadu_si256(row.as_ptr().add(8).cast());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ unsafe fn _bignum_copy_row_from_table_8n_avx2(
width: u64,
index: u64,
) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// SAFETY: prefetches do not fault and are not architecturally visible
_mm_prefetch(table.as_ptr().cast(), _MM_HINT_T0);
Expand Down
2 changes: 2 additions & 0 deletions graviola/src/low/x86_64/bignum_point_select_p256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub(crate) fn bignum_jac_point_select_p256(z: &mut [u64; 12], table: &[u64], ind

#[target_feature(enable = "avx,avx2")]
unsafe fn _select_aff_p256(z: &mut [u64; 8], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// SAFETY: prefetches do not fault and are not architecturally visible
_mm_prefetch(table.as_ptr().cast(), _MM_HINT_T0);
Expand Down Expand Up @@ -61,6 +62,7 @@ unsafe fn _select_aff_p256(z: &mut [u64; 8], table: &[u64], index: u8) {

#[target_feature(enable = "avx,avx2")]
unsafe fn _select_jac_p256(z: &mut [u64; 12], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
// SAFETY: prefetches do not fault and are not architecturally visible
_mm_prefetch(table.as_ptr().cast(), _MM_HINT_T0);
Expand Down
1 change: 1 addition & 0 deletions graviola/src/low/x86_64/bignum_point_select_p384.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub(crate) fn bignum_jac_point_select_p384(z: &mut [u64; 18], table: &[u64], ind

#[target_feature(enable = "avx,avx2")]
unsafe fn _select_jac_p384(z: &mut [u64; 18], table: &[u64], index: u8) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
_mm_prefetch(table.as_ptr().cast(), _MM_HINT_T0);
_mm_prefetch(table.as_ptr().add(16).cast(), _MM_HINT_T0);
Expand Down
4 changes: 4 additions & 0 deletions graviola/src/low/x86_64/chacha20.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ macro_rules! rotate_left_128 {

#[target_feature(enable = "ssse3,avx2")]
unsafe fn format_key(key: &[u8; 32], nonce: &[u8; 16]) -> ChaCha20 {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let z07 = _mm256_set_m128i(
_mm_lddqu_si128(SIGMA.as_ptr().cast()),
Expand All @@ -115,6 +116,7 @@ unsafe fn format_key(key: &[u8; 32], nonce: &[u8; 16]) -> ChaCha20 {
/// Computes 8 blocks. Does _NOT_ handle ragged output.
#[target_feature(enable = "avx2")]
unsafe fn core_8x(t07: __m256i, z8f: &mut __m256i, xor_out_512: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let t8f = *z8f;
*z8f = _mm256_add_epi32(*z8f, _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 8));
Expand Down Expand Up @@ -355,6 +357,7 @@ unsafe fn core_8x(t07: __m256i, z8f: &mut __m256i, xor_out_512: &mut [u8]) {
/// be 0..64 bytes).
#[target_feature(enable = "avx2")]
unsafe fn core_2x(t07: __m256i, z8f: &mut __m256i, xor_out: &mut [u8]) {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let t8f = *z8f;
let blocks_used = if xor_out.len() > 32 { 2 } else { 1 };
Expand Down Expand Up @@ -457,6 +460,7 @@ unsafe fn core_2x(t07: __m256i, z8f: &mut __m256i, xor_out: &mut [u8]) {

#[target_feature(enable = "ssse3,avx2")]
unsafe fn hchacha(key: &[u8; 32], nonce: &[u8; 24]) -> ChaCha20 {
// SAFETY: intrinsics. see [crate::low::inline_assembly_safety#safety-of-intrinsics] for safety info.
unsafe {
let mut z03 = _mm_lddqu_si128(SIGMA.as_ptr().cast());
let mut z47 = _mm_lddqu_si128(key[0..16].as_ptr().cast());
Expand Down
Loading

0 comments on commit 68fe8c6

Please sign in to comment.