Skip to content

Commit

Permalink
Rework
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Sep 25, 2024
1 parent 54297a9 commit 2ff92da
Show file tree
Hide file tree
Showing 74 changed files with 2,907 additions and 8,713 deletions.
11 changes: 11 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 11 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
# Fast morphology in pure Rust

This crate provides fast 2D arbitrary shaped structuring element for planar, RGB and RGBA images.
In most cases performance stays close to OpenCV, for some shapes on larger kernels works faster than OpenCV.
For small kernels OpenCV performs faster.

# Usage example
In most cases performance when implemented fully in hardware faster than OpenCV.

```rust
dilate_rgb(
Expand Down Expand Up @@ -41,30 +38,29 @@ M3 Pro, NEON dilation RGB image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|---------|----------|----------|----------|
| FM | 35.41ms | 68.53ms | 85.31ms | 208.47ms | 377.04ms |
| FM | 16.81ms | 17.99ms | 24.53ms | 69.00ms | 142.76ms |
| OpenCV | 20.65ms | 54.43ms | 107.58ms | 418.66ms | 905.21ms |

M3 Pro, NEON dilation RGBA image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|---------|----------|----------|----------|
| FM | 48.25ms | 81.12ms | 111.79ms | 274.01ms | 515.54ms |
| FM | 21.35ms | 27.20ms | 36.31ms | 93.81ms | 191.31ms |
| OpenCV | 30.22ms | 72.63ms | 138.69ms | 555.51ms | 1.19s |

SSE dilation RGB image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|----------|----------|----------|--------|
| FM | 84.19ms | 186.53ms | 254.70ms | 673.45ms | 1.37s |
| OpenCV | 28.61ms | 62.43ms | 114.80ms | 428.87ms | 1.16ms |
| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|---------|----------|----------|----------|-------|
| FM | 84.19ms | 186.53ms | 254.70ms | 673.45ms | 1.37s |
| OpenCV | 28.61ms | 62.43ms | 114.80ms | 428.87ms | 1.16s |

SSE dilation RGBA image 2731x4096 with specified kernel size

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|----------|----------|----------|----------|--------|
| FM | 109.37ms | 229.11ms | 329.31ms | 981.48ms | 2.05ms |
| OpenCV | 39.20ms | 76.09ms | 149.12ms | 569.36ms | 1.33s |

| SE | 9x9 | 15x15 | 21x21 | 41x41 | 61x61 |
|--------|----------|----------|----------|----------|-------|
| FM | 109.37ms | 229.11ms | 329.31ms | 981.48ms | 2.05s |
| OpenCV | 39.20ms | 76.09ms | 149.12ms | 569.36ms | 1.33s |

This project is licensed under either of

Expand Down
2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = "0.25.2"
fast_morphology = {path = "../"}
imageproc = "0.25.0"
opencv = {version = "0.93.0", features = ["imgproc"]}
opencv = {version = "0.93.0", features = ["imgproc", "clang-runtime"]}

[dev-dependencies]
criterion = {version = "0.5.1", features = ["html_reports"]}
Expand Down
12 changes: 6 additions & 6 deletions app/benches/dilation/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,17 +316,17 @@ pub fn criterion_benchmark(c: &mut Criterion) {
opencv::core::set_use_ipp(false).expect("Failed to disable IPP");
opencv::core::set_use_optimized(false).expect("Failed to disable opts");

exec_bench_rgb(c, 4);
// exec_bench_rgb(c, 4);
// exec_bench_rgb(c, 7);
// exec_bench_rgb(c, 10);
// exec_bench_rgb(c, 20);
// exec_bench_rgb(c, 30);
//
// exec_bench_rgba(c, 4);
// exec_bench_rgba(c, 7);
// exec_bench_rgba(c, 10);
// exec_bench_rgba(c, 20);
// exec_bench_rgba(c, 30);
exec_bench_rgba(c, 4);
exec_bench_rgba(c, 7);
exec_bench_rgba(c, 10);
exec_bench_rgba(c, 20);
exec_bench_rgba(c, 30);

exec_bench_gray(c, 4);
// exec_bench_gray(c, 7);
Expand Down
8 changes: 4 additions & 4 deletions app/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use fast_morphology::{
dilate, dilate_rgb, dilate_rgba, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
dilate, dilate_rgb, BorderMode, ImageSize, KernelShape, MorphologyThreadingPolicy,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use opencv::core::{
Mat, MatTrait, MatTraitConstManual, Point, Scalar, BORDER_CONSTANT, BORDER_ISOLATED,
BORDER_REPLICATE, CV_8U, CV_8UC3, CV_8UC4,
Mat, MatTrait, MatTraitConstManual, Point, Scalar,
BORDER_REPLICATE, CV_8U, CV_8UC3,
};
use opencv::imgproc;
use std::time::Instant;
Expand Down Expand Up @@ -66,7 +66,7 @@ fn gaussian_kernel(size: usize, sigma: f32) -> Vec<Vec<f32>> {
}

fn main() {
let radius_size = 15;
let radius_size = 55;
let mut structuring_element = circle_se(radius_size);

opencv::core::set_use_opencl(false).expect("Failed to disable OpenCL");
Expand Down
38 changes: 8 additions & 30 deletions src/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,17 @@ use crate::border_mode::{reflect_index, reflect_index_101, BorderMode};
use crate::filter_op_declare::Arena;
use crate::structuring_element::KernelShape;

pub const PREFERRED_KERNEL_SIZE_FOR_ARENA: usize = 250;

/// Pads an image with *clamp to border* strategy
pub fn make_arena<const COMPONENTS: usize>(
image: &[u8],
/// Pads an image with chosen border strategy
pub fn make_arena<T, const COMPONENTS: usize>(
image: &[T],
width: u32,
height: u32,
kernel_size: KernelShape,
border_mode: BorderMode,
) -> Arena {
) -> Arena<T>
where
T: Default + Copy,
{
let (kw, kh) = (kernel_size.width, kernel_size.height);

let pad_w = kw / 2;
Expand All @@ -49,7 +50,7 @@ pub fn make_arena<const COMPONENTS: usize>(
let new_height = height as usize + 2 * pad_h;
let new_width = width as usize + 2 * pad_w;

let mut padded_image = vec![0u8; new_height * new_width * COMPONENTS];
let mut padded_image = vec![T::default(); new_height * new_width * COMPONENTS];

let old_stride = width as usize * COMPONENTS;
let new_stride = new_width * COMPONENTS;
Expand Down Expand Up @@ -189,29 +190,6 @@ pub fn make_arena<const COMPONENTS: usize>(
}
}
}
BorderMode::Constant(constant) => {
for i in 0..pad_h {
for j in 0..pad_w {
unsafe {
let v_dst = i * new_stride + j * COMPONENTS;
for i in 0..COMPONENTS {
*padded_image.get_unchecked_mut(v_dst + i) = constant[i];
}
}
}
}

for i in (height as usize + pad_h)..new_height {
for j in (width as usize + pad_w)..new_width {
unsafe {
let v_dst = i * new_stride + j * COMPONENTS;
for i in 0..COMPONENTS {
*padded_image.get_unchecked_mut(v_dst + i) = constant[i];
}
}
}
}
}
}

Arena::new(padded_image, new_width, new_height, pad_w, pad_h)
Expand Down
69 changes: 45 additions & 24 deletions src/arena_roi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,35 +112,56 @@ unsafe fn copy_row_sse(dst: &mut [u8], src: &[u8], start: usize, stride: usize)

/// Copies ROI from one image to another
#[allow(clippy::type_complexity)]
pub fn copy_roi(arena: &mut [u8], roi: &[u8], arena_stride: usize, stride: usize, height: usize) {
let mut dst = arena;
let mut src = roi;
let mut _row_handle: Option<unsafe fn(&mut [u8], &[u8], usize, usize) -> usize> = None;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
_row_handle = Some(copy_row_neon);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_row_handle = Some(copy_row_sse);
pub fn copy_roi<T>(arena: &mut [T], roi: &[T], arena_stride: usize, stride: usize, height: usize)
where
T: Copy,
{
if std::any::type_name::<T>() == "u8" {
let mut dst: &mut [u8] = unsafe { std::mem::transmute(arena) };
let mut src = unsafe { std::mem::transmute(roi) };
let mut _row_handle: Option<unsafe fn(&mut [u8], &[u8], usize, usize) -> usize> = None;
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
_row_handle = Some(copy_row_neon);
}
}
unsafe {
for _ in 0..height {
let mut _cx = 0usize;

if let Some(row_handle) = _row_handle {
_cx = row_handle(dst, src, _cx, stride);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if std::arch::is_x86_feature_detected!("sse4.1") {
_row_handle = Some(copy_row_sse);
}
}
unsafe {
for _ in 0..height {
let mut _cx = 0usize;

if let Some(row_handle) = _row_handle {
_cx = row_handle(dst, src, _cx, stride);
}

while _cx < stride {
*dst.get_unchecked_mut(_cx) = *src.get_unchecked(_cx);
_cx += 1;
while _cx < stride {
*dst.get_unchecked_mut(_cx) = *src.get_unchecked(_cx);
_cx += 1;
}

dst = dst.get_unchecked_mut(arena_stride..);
src = src.get_unchecked(stride..);
}
}
} else {
let mut dst = arena;
let mut src = roi;
unsafe {
for _ in 0..height {
let mut _cx = 0usize;

dst = dst.get_unchecked_mut(arena_stride..);
src = src.get_unchecked(stride..);
while _cx < stride {
*dst.get_unchecked_mut(_cx) = *src.get_unchecked(_cx);
_cx += 1;
}

dst = dst.get_unchecked_mut(arena_stride..);
src = src.get_unchecked(stride..);
}
}
}
}
39 changes: 0 additions & 39 deletions src/border_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
use num_traits::{AsPrimitive, Euclid, FromPrimitive, Signed};
use std::ops::Index;

#[repr(C)]
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Default)]
Expand All @@ -42,8 +41,6 @@ pub enum BorderMode {
Reflect,
/// If filter goes out of bounds image will be replicated with rule `gfedcb|abcdefgh|gfedcba`
Reflect101,
/// If filter goes out of bounds image will be replaced with provided constant values
Constant(OutOfBoundsConstant),
}

#[inline]
Expand Down Expand Up @@ -104,39 +101,3 @@ where
}
i.as_()
}

#[repr(C)]
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct OutOfBoundsConstant {
pub r: u8,
pub g: u8,
pub b: u8,
pub a: u8,
}

impl OutOfBoundsConstant {
pub fn new(r: u8, g: u8, b: u8, a: u8) -> OutOfBoundsConstant {
OutOfBoundsConstant { r, g, b, a }
}

pub fn replicate(v: u8) -> OutOfBoundsConstant {
OutOfBoundsConstant::new(v, v, v, v)
}
}

impl Index<usize> for OutOfBoundsConstant {
type Output = u8;

#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
match index {
0 => &self.r,
1 => &self.g,
2 => &self.b,
3 => &self.a,
_ => {
panic!("Index if {} is not exists in [OutOfBoundsConstant]", index)
}
}
}
}
Loading

0 comments on commit 2ff92da

Please sign in to comment.