Skip to content

Commit

Permalink
Improvements on x86
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Sep 24, 2024
1 parent c09e7e5 commit 54297a9
Show file tree
Hide file tree
Showing 32 changed files with 583 additions and 332 deletions.
5 changes: 5 additions & 0 deletions .cargo/cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[env]
OPENCV_INCLUDE_PATHS= { value = "C:\\vcpkg\\installed\\x64-windows-static-md\\include", relative = false }
OPENCV_LINK_PATHS= { value = "C:\\vcpkg\\installed\\x64-windows-static-md\\lib", relative = false }
OPENCV_LINK_LIBS="opencv_core4.lib,opencv_imgproc4.lib,zlib.lib"
OPENCV_MSVC_CRT="static"
11 changes: 0 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ exclude = ["*.jpg", "*.png"]
[dependencies]
colorutils-rs = "0.5.12"
num-traits = "0.2.19"
rayon = "1.10.0"
rayon = "1.10.0"
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ M3 Pro, NEON dilation RGBA image 2731x4096 with specified kernel size
| FM | 48.25ms | 81.12ms | 111.79ms | 274.01ms | 515.54ms |
| OpenCV | 30.22ms | 72.63ms | 138.69ms | 555.51ms | 1.19s |

SSE dilation RGB image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|----------|----------|----------|--------|
| FM | 84.19ms | 186.53ms | 254.70ms | 673.45ms | 1.37s |
| OpenCV | 28.61ms | 62.43ms | 114.80ms | 428.87ms | 1.16ms |

SSE dilation RGBA image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|----------|----------|----------|----------|--------|
| FM | 109.37ms | 229.11ms | 329.31ms | 981.48ms | 2.05ms |
| OpenCV | 39.20ms | 76.09ms | 149.12ms | 569.36ms | 1.33s |


This project is licensed under either of

- BSD-3-Clause License (see [LICENSE](LICENSE.md))
Expand Down
2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = "0.25.2"
fast_morphology = {path = "../"}
imageproc = "0.25.0"
opencv = {version = "0.93.0", features = ["clang-runtime", "imgproc"]}
opencv = {version = "0.93.0", features = ["imgproc"]}

[dev-dependencies]
criterion = {version = "0.5.1", features = ["html_reports"]}
Expand Down
116 changes: 103 additions & 13 deletions app/benches/dilation/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@

use criterion::{criterion_group, criterion_main, Criterion};
use fast_morphology::{
dilate_rgb, dilate_rgba, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
dilate, dilate_rgb, dilate_rgba, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use opencv::core::{Mat, MatTrait, Point, Scalar, BORDER_REPLICATE, CV_8U, CV_8UC3, CV_8UC4};
use opencv::core::{
Mat, MatTrait, Point, Scalar, BORDER_REPLICATE, CV_8U, CV_8UC1, CV_8UC3, CV_8UC4,
};
use opencv::imgproc;

pub fn circle_se(radius: usize) -> Vec<u8> {
Expand Down Expand Up @@ -145,6 +147,88 @@ fn exec_bench_rgb(c: &mut Criterion, size: usize) {
);
}

fn exec_bench_gray(c: &mut Criterion, size: usize) {
let img = ImageReader::open("../assets/fruits.jpg")
.unwrap()
.decode()
.unwrap();
let dimensions = img.dimensions();
let rgb_image = img.to_luma8();
let rgb_bytes = rgb_image.as_bytes();

let radius_size_7 = size;
let se_size_15 = radius_size_7 * 2 + 1;
let structuring_element_15 = circle_se(radius_size_7);

let mut kernel_15 = Mat::new_rows_cols_with_default(
se_size_15 as i32,
se_size_15 as i32,
CV_8U,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();
unsafe {
for (index, &byte) in structuring_element_15.iter().enumerate() {
kernel_15.data_mut().add(index).write(byte);
}
}

c.bench_function(
format!("FM, Gray Image dilation: SE {}x{}", se_size_15, se_size_15).as_str(),
|b| {
b.iter(|| {
let mut dst_image = vec![0u8; dimensions.0 as usize * dimensions.1 as usize];
dilate(
rgb_bytes,
&mut dst_image,
ImageSize::new(dimensions.0 as usize, dimensions.1 as usize),
&structuring_element_15,
KernelShape::new(se_size_15, se_size_15),
BorderMode::default(),
MorphologyThreadingPolicy::Adaptive,
)
.unwrap();
})
},
);

let mut mat = Mat::new_rows_cols_with_default(
dimensions.1 as i32,
dimensions.0 as i32,
CV_8UC1,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();
unsafe {
for (index, &byte) in rgb_bytes.iter().enumerate() {
mat.data_mut().add(index).write(byte);
}
}

c.bench_function(
format!(
"OpenCV, Gray Image dilation: SE {}x{}",
se_size_15, se_size_15
)
.as_str(),
|b| {
b.iter(|| {
let mut dst_mat = Mat::default();
imgproc::dilate(
&mat,
&mut dst_mat,
&kernel_15,
Point::new(-1, -1),
1,
BORDER_REPLICATE,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();
})
},
);
}

fn exec_bench_rgba(c: &mut Criterion, size: usize) {
let img = ImageReader::open("../assets/fruits.jpg")
.unwrap()
Expand Down Expand Up @@ -229,20 +313,26 @@ fn exec_bench_rgba(c: &mut Criterion, size: usize) {

pub fn criterion_benchmark(c: &mut Criterion) {
opencv::core::set_use_opencl(false).expect("Failed to disable OpenCL");
opencv::core::set_use_ipp(true).expect("Failed to disable IPP");
opencv::core::set_use_ipp(false).expect("Failed to disable IPP");
opencv::core::set_use_optimized(false).expect("Failed to disable opts");

exec_bench_rgb(c, 4);
exec_bench_rgb(c, 7);
exec_bench_rgb(c, 10);
exec_bench_rgb(c, 20);
exec_bench_rgb(c, 30);

exec_bench_rgba(c, 4);
exec_bench_rgba(c, 7);
exec_bench_rgba(c, 10);
exec_bench_rgba(c, 20);
exec_bench_rgba(c, 30);
// exec_bench_rgb(c, 7);
// exec_bench_rgb(c, 10);
// exec_bench_rgb(c, 20);
// exec_bench_rgb(c, 30);
//
// exec_bench_rgba(c, 4);
// exec_bench_rgba(c, 7);
// exec_bench_rgba(c, 10);
// exec_bench_rgba(c, 20);
// exec_bench_rgba(c, 30);

exec_bench_gray(c, 4);
// exec_bench_gray(c, 7);
// exec_bench_gray(c, 10);
// exec_bench_gray(c, 20);
// exec_bench_gray(c, 30);
}

criterion_group!(benches, criterion_benchmark);
Expand Down
98 changes: 49 additions & 49 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ fn gaussian_kernel(size: usize, sigma: f32) -> Vec<Vec<f32>> {
}

fn main() {
let radius_size = 5;
let radius_size = 15;
let mut structuring_element = circle_se(radius_size);

// opencv::core::set_use_opencl(false).expect("Failed to disable OpenCL");
// opencv::core::set_use_ipp(true).expect("Failed to disable IPP");
// opencv::core::set_use_optimized(false).expect("Failed to disable opts");
opencv::core::set_use_opencl(false).expect("Failed to disable OpenCL");
opencv::core::set_use_ipp(false).expect("Failed to disable IPP");
opencv::core::set_use_optimized(false).expect("Failed to disable opts");

let se_size = radius_size * 2 + 1;
let full_size = se_size;
Expand All @@ -80,7 +80,7 @@ fn main() {
println!("{:?}", Vec::from(elements));
}

let img = ImageReader::open("./assets/ebelhard.jpg")
let img = ImageReader::open("./assets/fruits.jpg")
.unwrap()
.decode()
.unwrap();
Expand Down Expand Up @@ -180,46 +180,46 @@ fn main() {

println!("rgb exec time {:?}", exec_time.elapsed());

// let mut mat = Mat::new_rows_cols_with_default(
// dimensions.1 as i32,
// dimensions.0 as i32,
// CV_8UC3,
// Scalar::new(0., 0., 0., 0.),
// )
// .unwrap();
// unsafe {
// for (index, &byte) in saved_origin.iter().enumerate() {
// mat.data_mut().add(index).write(byte);
// }
// }
// let mut kernel = Mat::new_rows_cols_with_default(
// full_size as i32,
// full_size as i32,
// CV_8U,
// Scalar::new(0., 0., 0., 0.),
// )
// .unwrap();
// unsafe {
// for (index, &byte) in structuring_element.iter().enumerate() {
// kernel.data_mut().add(index).write(byte);
// }
// }
let mut mat = Mat::new_rows_cols_with_default(
dimensions.1 as i32,
dimensions.0 as i32,
CV_8UC3,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();
unsafe {
for (index, &byte) in saved_origin.iter().enumerate() {
mat.data_mut().add(index).write(byte);
}
}
let mut kernel = Mat::new_rows_cols_with_default(
full_size as i32,
full_size as i32,
CV_8U,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();
unsafe {
for (index, &byte) in structuring_element.iter().enumerate() {
kernel.data_mut().add(index).write(byte);
}
}

let exec_time = Instant::now();

// let mut dst_mat = Mat::default();
// imgproc::dilate(
// &mat,
// &mut dst_mat,
// &kernel,
// Point::new(-1, -1),
// 1,
// BORDER_REPLICATE,
// Scalar::new(0., 0., 0., 0.),
// )
// .unwrap();
let mut dst_mat = Mat::default();
imgproc::dilate(
&mat,
&mut dst_mat,
&kernel,
Point::new(-1, -1),
1,
BORDER_REPLICATE,
Scalar::new(0., 0., 0., 0.),
)
.unwrap();

// let open_cv_bytes = dst_mat.data_bytes().unwrap();
let open_cv_bytes = dst_mat.data_bytes().unwrap();

println!("opencv exec time {:?}", exec_time.elapsed());

Expand All @@ -241,12 +241,12 @@ fn main() {
)
.unwrap();

// image::save_buffer(
// "converted_opencv.png",
// &open_cv_bytes,
// dimensions.0,
// dimensions.1,
// image::ColorType::Rgb8,
// )
// .unwrap();
image::save_buffer(
"converted_opencv.png",
&open_cv_bytes,
dimensions.0,
dimensions.1,
image::ColorType::Rgb8,
)
.unwrap();
}
1 change: 1 addition & 0 deletions src/arena_roi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ unsafe fn copy_row_sse(dst: &mut [u8], src: &[u8], start: usize, stride: usize)
}

/// Copies ROI from one image to another
#[allow(clippy::type_complexity)]
pub fn copy_roi(arena: &mut [u8], roi: &[u8], arena_stride: usize, stride: usize, height: usize) {
let mut dst = arena;
let mut src = roi;
Expand Down
5 changes: 1 addition & 4 deletions src/flat_se.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,7 @@ pub(crate) struct AnalyzedSe {
}

impl AnalyzedSe {
pub fn new(
original_se: Vec<u8>,
left_front: FlatSe,
) -> AnalyzedSe {
pub fn new(original_se: Vec<u8>, left_front: FlatSe) -> AnalyzedSe {
let is_empty =
left_front.element_offsets.is_empty() && left_front.element_offsets.is_empty();
AnalyzedSe {
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
extern crate core;

mod arena;
mod arena_roi;
mod border_mode;
mod filter;
mod filter_op_declare;
Expand All @@ -46,7 +47,6 @@ mod se_scan;
mod structuring_element;
mod thread_policy;
mod unsafe_slice;
mod arena_roi;

pub use border_mode::BorderMode;
pub use img_size::ImageSize;
Expand Down
1 change: 1 addition & 0 deletions src/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ mod morph_rows_rgba_4_op;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
pub mod neon;
mod op;
mod smart_allocator;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod sse;
mod utils;
Expand Down
Loading

0 comments on commit 54297a9

Please sign in to comment.