From a468b30cb75babe268c249f13606cbaa60c14bb7 Mon Sep 17 00:00:00 2001 From: Robert Knight Date: Sun, 2 Feb 2025 21:42:40 +0000 Subject: [PATCH] Add missing `#[inline(always)]` to `Quantize` SIMD op Fix calls to SIMD intrinsics not being inlined. Also add `target_feature` to an AVX2 SIMD impl for consistency with other methods. --- rten-simd/src/arch/x86_64.rs | 1 + rten-vecmath/src/quantize.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/rten-simd/src/arch/x86_64.rs b/rten-simd/src/arch/x86_64.rs index 2cb6f2e5..4437a85e 100644 --- a/rten-simd/src/arch/x86_64.rs +++ b/rten-simd/src/arch/x86_64.rs @@ -171,6 +171,7 @@ impl SimdInt for __m256i { } #[inline] + #[target_feature(enable = "avx2")] unsafe fn saturating_cast_u8(self) -> impl Simd { use std::arch::x86_64::{ __m128i, _mm256_castsi256_si128, _mm256_packus_epi16, _mm256_packus_epi32, diff --git a/rten-vecmath/src/quantize.rs b/rten-vecmath/src/quantize.rs index 3caffb74..6eec6a9a 100644 --- a/rten-vecmath/src/quantize.rs +++ b/rten-vecmath/src/quantize.rs @@ -38,6 +38,7 @@ impl<'s, 'd, To> Quantize<'s, 'd, To> { impl<'d> SimdOp for Quantize<'_, 'd, u8> { type Output = &'d mut [u8]; + #[inline(always)] unsafe fn eval(self) -> Self::Output { let mut n = self.src.len(); let mut src_ptr = self.src.as_ptr();