Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Simd::to_array impls a simple transmute #563

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions rten-simd/src/arch/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@ use std::arch::aarch64::{
vcleq_s32, vcltq_f32, vcltq_s32, vcombine_s16, vcvtnq_s32_f32, vcvtq_s32_f32, vdivq_f32,
vdupq_n_f32, vdupq_n_s32, veorq_s32, vfmaq_f32, vld1q_f32, vld1q_s32, vld1q_u32, vmaxq_f32,
vmaxq_s32, vminq_f32, vminq_s32, vmulq_f32, vmulq_s32, vqmovn_s32, vqmovun_s16,
vreinterpretq_f32_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vst1q_u32, vsubq_f32, vsubq_s32,
};

use core::arch::aarch64::{
vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8, vreinterpretq_s8_s32,
vzip1q_s16, vzip1q_s8, vzip2q_s16, vzip2q_s8,
vreinterpretq_f32_s32, vreinterpretq_s16_s32, vreinterpretq_s32_s16, vreinterpretq_s32_s8,
vreinterpretq_s8_s32, vshlq_n_s32, vst1q_f32, vst1q_s32, vsubq_f32, vsubq_s32, vzip1q_s16,
vzip1q_s8, vzip2q_s16, vzip2q_s8,
};
use std::mem::transmute;

use crate::{Simd, SimdFloat, SimdInt, SimdMask};

Expand All @@ -30,8 +28,7 @@ impl SimdMask for uint32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; 4];
vst1q_u32(array.as_mut_ptr(), self);
let array = transmute::<Self, [u32; 4]>(self);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -70,9 +67,7 @@ impl Simd for int32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -225,9 +220,7 @@ impl Simd for float32x4_t {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down
13 changes: 5 additions & 8 deletions rten-simd/src/arch/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::arch::wasm32::{
#[cfg(target_feature = "relaxed-simd")]
use std::arch::wasm32::f32x4_relaxed_madd;

use std::mem::transmute;

use crate::{Simd, SimdFloat, SimdInt, SimdMask};

/// Wrapper around a WASM v128 type that marks it as containing integers.
Expand Down Expand Up @@ -41,8 +43,7 @@ impl SimdMask for v128i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
let array = transmute::<v128, [u32; 4]>(self.0);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -76,9 +77,7 @@ impl Simd for v128i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<v128, Self::Array>(self.0)
}
}

Expand Down Expand Up @@ -249,9 +248,7 @@ impl Simd for v128f {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<v128, Self::Array>(self.0)
}
}

Expand Down
19 changes: 5 additions & 14 deletions rten-simd/src/arch/x86_64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ impl SimdMask for __m256i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
let array = <Self as Simd>::to_array(self);
std::array::from_fn(|i| array[i] != 0)
}
}
Expand Down Expand Up @@ -93,9 +92,7 @@ impl Simd for __m256i {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -292,9 +289,7 @@ impl Simd for __m256 {

#[inline]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -494,9 +489,7 @@ impl Simd for __m512i {
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down Expand Up @@ -699,9 +692,7 @@ impl Simd for __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
unsafe fn to_array(self) -> Self::Array {
let mut array = [0.; Self::LEN];
self.store(array.as_mut_ptr());
array
transmute::<Self, Self::Array>(self)
}
}

Expand Down
8 changes: 8 additions & 0 deletions rten-simd/src/vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ pub trait Simd: Copy + Sized {
}

/// Return the contents of this vector as an array.
///
/// This is a cheap transmute for most implementations because the SIMD
/// type and the array have the same layout. The converse is not true
/// because the SIMD type may have greater alignment.
unsafe fn to_array(self) -> Self::Array;

/// Return a new vector with all elements set to zero.
Expand Down Expand Up @@ -176,6 +180,10 @@ pub trait SimdMask: Copy {
}

/// Convert this SIMD mask to a boolean array.
///
/// Unlike [`Simd::to_array`] this is not a simple transmute because
/// the elements need to be converted from the architecture-specific
/// representation of a mask to a `bool` array.
unsafe fn to_array(self) -> Self::Array;

/// Create a SIMD mask from a boolean array.
Expand Down