Skip to content

Commit

Permalink
Test, improvements on SSE, AVX
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Sep 25, 2024
1 parent 2ff92da commit 841ab4e
Show file tree
Hide file tree
Showing 38 changed files with 1,418 additions and 185 deletions.
11 changes: 0 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ M3 Pro, NEON dilation RGBA image 2731x4096 with specified kernel size

SSE dilation RGB image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|----------|----------|----------|-------|
| FM | 84.19ms | 186.53ms | 254.70ms | 673.45ms | 1.37s |
| OpenCV | 28.61ms | 62.43ms | 114.80ms | 428.87ms | 1.16s |
| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|---------|----------|----------|----------|
| FM | 30.71ms | 34.87ms | 39.93ms | 81.56ms | 149.37ms |
| OpenCV | 27.36ms | 63.05ms | 112.54ms | 419.40ms | 1.08s |

SSE dilation RGBA image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|----------|----------|----------|----------|-------|
| FM | 109.37ms | 229.11ms | 329.31ms | 981.48ms | 2.05s |
| OpenCV | 39.20ms | 76.09ms | 149.12ms | 569.36ms | 1.33s |
| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|---------|----------|----------|----------|
| FM | 45.03ms | 49.03ms | 56.40ms | 114.72ms | 206.05ms |
| OpenCV | 35.50ms | 79.60ms | 147.32ms | 556.56ms | 1.33s |

This project is licensed under either of

Expand Down
2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = "0.25.2"
fast_morphology = {path = "../"}
imageproc = "0.25.0"
opencv = {version = "0.93.0", features = ["imgproc", "clang-runtime"]}
opencv = {version = "0.93.0", features = ["imgproc"]}

[dev-dependencies]
criterion = {version = "0.5.1", features = ["html_reports"]}
Expand Down
22 changes: 11 additions & 11 deletions app/benches/dilation/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,19 +316,19 @@ pub fn criterion_benchmark(c: &mut Criterion) {
opencv::core::set_use_ipp(false).expect("Failed to disable IPP");
opencv::core::set_use_optimized(false).expect("Failed to disable opts");

// exec_bench_rgb(c, 4);
// exec_bench_rgb(c, 7);
// exec_bench_rgb(c, 10);
// exec_bench_rgb(c, 20);
// exec_bench_rgb(c, 30);
exec_bench_rgb(c, 4);
exec_bench_rgb(c, 7);
exec_bench_rgb(c, 10);
exec_bench_rgb(c, 20);
exec_bench_rgb(c, 30);
//
exec_bench_rgba(c, 4);
exec_bench_rgba(c, 7);
exec_bench_rgba(c, 10);
exec_bench_rgba(c, 20);
exec_bench_rgba(c, 30);
// exec_bench_rgba(c, 4);
// exec_bench_rgba(c, 7);
// exec_bench_rgba(c, 10);
// exec_bench_rgba(c, 20);
// exec_bench_rgba(c, 30);

exec_bench_gray(c, 4);
// exec_bench_gray(c, 4);
// exec_bench_gray(c, 7);
// exec_bench_gray(c, 10);
// exec_bench_gray(c, 20);
Expand Down
13 changes: 6 additions & 7 deletions app/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use fast_morphology::{
dilate, dilate_rgb, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
dilate, dilate_rgb, dilate_rgba, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use opencv::core::{
Mat, MatTrait, MatTraitConstManual, Point, Scalar,
BORDER_REPLICATE, CV_8U, CV_8UC3,
Mat, MatTrait, MatTraitConstManual, Point, Scalar, BORDER_REPLICATE, CV_8U, CV_8UC3,
};
use opencv::imgproc;
use std::time::Instant;
Expand Down Expand Up @@ -164,11 +163,11 @@ fn main() {
}

let rgba_image = transient_rgba.as_bytes();
let mut dst = vec![0u8; saved_origin.len()];
let mut dst = vec![0u8; rgba_image.len()];

let exec_time = Instant::now();
dilate_rgb(
&saved_origin,
dilate_rgba(
&rgba_image,
&mut dst,
image_size,
&structuring_element,
Expand Down Expand Up @@ -237,7 +236,7 @@ fn main() {
&dst,
dimensions.0,
dimensions.1,
image::ColorType::Rgb8,
image::ColorType::Rgba8,
)
.unwrap();

Expand Down
68 changes: 60 additions & 8 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,17 @@
*/
use crate::filter_op_declare::{Arena, MorthOpFilterFlat2DRow};
use crate::flat_se::AnalyzedSe;
use crate::morph_base::MorphNativeOp;
use crate::op_type::MorphOp;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::ops::avx::MorphOpFilterAvx2DRow;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::ops::neon::MorphOpFilterNeon2DRow;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::ops::sse::{MorphOpFilterSse2DRow};
use crate::ops::sse::{MorphOpFilterSse2DRow, MorphOpFilterSse2DRowF32, MorphOpFilterSse2DRowU16};
use crate::ops::MorphFilterFlat2DRow;
use crate::unsafe_slice::UnsafeSlice;
use crate::ImageSize;
use crate::morph_base::MorphNativeOp;

pub struct MorthFilterFlat2DRow<T>
where
Expand Down Expand Up @@ -78,9 +80,34 @@ where
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
_result = Box::new(
MorphOpFilterSse2DRow::<{ MorphOp::Dilate as u8 }>::default(),
);
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(MorphOpFilterSse2DRow::<
{ MorphOp::Dilate as u8 },
>::default());
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(MorphOpFilterAvx2DRow::<
{ MorphOp::Dilate as u8 },
>::default());
}
}
} else if std::any::type_name::<T>() == "u16" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(MorphOpFilterSse2DRowU16::<
{ MorphOp::Dilate as u8 },
>::default());
}
}
} else if std::any::type_name::<T>() == "f32" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(MorphOpFilterSse2DRowF32::<
{ MorphOp::Dilate as u8 },
>::default());
}
}
}
_result
Expand All @@ -97,9 +124,34 @@ where
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
_result = Box::new(
MorphOpFilterSse2DRow::<{ MorphOp::Erode as u8 }>::default(),
);
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(
MorphOpFilterSse2DRow::<{ MorphOp::Erode as u8 }>::default(),
);
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(
MorphOpFilterAvx2DRow::<{ MorphOp::Erode as u8 }>::default(),
);
}
}
} else if std::any::type_name::<T>() == "u16" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(MorphOpFilterSse2DRowU16::<
{ MorphOp::Erode as u8 },
>::default());
}
}
} else if std::any::type_name::<T>() == "f32" {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_result = Box::new(MorphOpFilterSse2DRowF32::<
{ MorphOp::Erode as u8 },
>::default());
}
}
}
_result
Expand Down
2 changes: 1 addition & 1 deletion src/morph_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ impl MorphNativeOp<f32> for f32 {
MorphOp::Erode => (*self).min(other),
}
}
}
}
9 changes: 1 addition & 8 deletions src/morph_rgb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,7 @@ pub(crate) unsafe fn make_morphology_rgb<T, const OP_TYPE: u8>(
threading_policy: MorphologyThreadingPolicy,
) -> Result<(), String>
where
T: RgbPackable<T>
+ Copy
+ 'static
+ Sync
+ Send
+ Clone
+ Default
+ MorphNativeOp<T>,
T: RgbPackable<T> + Copy + 'static + Sync + Send + Clone + Default + MorphNativeOp<T>,
{
let unpacked = T::unpack(src, image_size);
let mut dst_unpacked = UnpackedRgbImage::alloc(image_size);
Expand Down
9 changes: 1 addition & 8 deletions src/morph_rgba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,7 @@ pub(crate) unsafe fn make_morphology_rgba<T, const OP_TYPE: u8>(
threading_policy: MorphologyThreadingPolicy,
) -> Result<(), String>
where
T: RgbaPackable<T>
+ Default
+ Copy
+ Clone
+ Send
+ Sync
+ 'static
+ MorphNativeOp<T>,
T: RgbaPackable<T> + Default + Copy + Clone + Send + Sync + 'static + MorphNativeOp<T>,
{
let unpacked = T::unpack(src, image_size);
let mut dst_unpacked = UnpackedRgbaImage::alloc(image_size);
Expand Down
2 changes: 1 addition & 1 deletion src/op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::border_mode::BorderMode;
use crate::morph_gray_alpha::make_morphology_gray_alpha;
use crate::morph_rgb::make_morphology_rgb;
use crate::morph_rgba::make_morphology_rgba;
use crate::op_impl::make_morphology;
use crate::op_type::MorphOp;
use crate::structuring_element::KernelShape;
use crate::{ImageSize, MorphologyThreadingPolicy};
use crate::morph_gray_alpha::make_morphology_gray_alpha;

/// Dilate a gray (planar) image
///
Expand Down
2 changes: 1 addition & 1 deletion src/op_f32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use crate::border_mode::BorderMode;
use crate::morph_gray_alpha::make_morphology_gray_alpha;
use crate::morph_rgb::make_morphology_rgb;
use crate::morph_rgba::make_morphology_rgba;
use crate::op_impl::make_morphology;
use crate::op_type::MorphOp;
use crate::structuring_element::KernelShape;
use crate::{ImageSize, MorphologyThreadingPolicy};
use crate::morph_gray_alpha::make_morphology_gray_alpha;

/// Dilate a gray (planar) stored in u16 image
///
Expand Down
2 changes: 1 addition & 1 deletion src/op_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ use crate::arena::make_arena;
use crate::border_mode::BorderMode;
use crate::filter::MorthFilterFlat2DRow;
use crate::filter_op_declare::MorthOpFilterFlat2DRow;
use crate::morph_base::MorphNativeOp;
use crate::op_type::MorphOp;
use crate::se_scan::scan_se;
use crate::structuring_element::KernelShape;
use crate::unsafe_slice::UnsafeSlice;
use crate::{ImageSize, MorphologyThreadingPolicy};
use std::sync::Arc;
use crate::morph_base::MorphNativeOp;

pub(crate) unsafe fn make_morphology<T, const OP_TYPE: u8>(
src: &[T],
Expand Down
3 changes: 3 additions & 0 deletions src/ops/avx/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mod morph_op;

pub use morph_op::MorphOpFilterAvx2DRow;
Loading

0 comments on commit 841ab4e

Please sign in to comment.