Skip to content

Commit

Permalink
Bugfixes and improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
awxkee committed Sep 25, 2024
1 parent bbb45c2 commit cc34e1b
Show file tree
Hide file tree
Showing 16 changed files with 950 additions and 40 deletions.
13 changes: 1 addition & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ workspace = { members = ["app"] }

[package]
name = "fast_morphology"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
description = "Fast morphology for images"
description = "Fast morphological operations for images"
readme = "README.md"
keywords = ["morph", "morphology", "dilate", "erode"]
license = "Apache-2.0 OR BSD-3-Clause"
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
# Fast morphology in pure Rust

This crate provides fast 2D arbitrary shaped structuring element for planar, RGB and RGBA images.
Library provides high performance erosion, dilation, closing and opening.

In most cases performance when implemented fully in hardware faster than OpenCV.

If you are not familiar read the [OpenCV doc](https://docs.opencv.org/4.x/d9/d61/tutorial_py_morphological_ops.html)

### Example

```rust
dilate_rgb(
&src,
Expand Down
2 changes: 1 addition & 1 deletion app/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"
image = "0.25.2"
fast_morphology = {path = "../", features = ["image"]}
imageproc = "0.25.0"
opencv = {version = "0.93.0", features = ["imgproc", "clang-runtime"]}
opencv = {version = "0.93.0", features = ["imgproc"]}

[dev-dependencies]
criterion = {version = "0.5.1", features = ["html_reports"]}
Expand Down
14 changes: 10 additions & 4 deletions app/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use fast_morphology::{
dilate, dilate_rgb, dilate_rgba, erode, erode_rgba, morphology_image, BorderMode, ImageSize,
KernelShape, MorphExOp, MorphologyThreadingPolicy,
};
use image::{EncodableLayout, GenericImageView, ImageReader};
use image::{DynamicImage, EncodableLayout, GenericImageView, ImageReader};
use opencv::core::{
Mat, MatTrait, MatTraitConstManual, Point, Scalar, BORDER_REPLICATE, CV_8U, CV_8UC3,
};
Expand Down Expand Up @@ -66,7 +66,7 @@ fn gaussian_kernel(size: usize, sigma: f32) -> Vec<Vec<f32>> {
}

fn main() {
let radius_size = 10;
let radius_size = 35;
let mut structuring_element = circle_se(radius_size);

opencv::core::set_use_opencl(false).expect("Failed to disable OpenCL");
Expand Down Expand Up @@ -223,17 +223,23 @@ fn main() {

println!("opencv exec time {:?}", exec_time.elapsed());

let exec_time = Instant::now();

let new_image = morphology_image(
img,
MorphExOp::Erode,
MorphExOp::Closing,
&structuring_element,
KernelShape::new(se_size, se_size),
BorderMode::default(),
MorphologyThreadingPolicy::default(),
)
.unwrap();

new_image.save("dilated.jpg").unwrap();
println!("morphology_image exec time {:?}", exec_time.elapsed());

let rollback_img = DynamicImage::ImageRgb8(new_image.to_rgb8());

rollback_img.save("dilated.jpg").unwrap();

image::save_buffer(
"converted.png",
Expand Down
59 changes: 59 additions & 0 deletions src/arena_roi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,62 @@ unsafe fn copy_row_sse(dst: &mut [u8], src: &[u8], start: usize, stride: usize)
_cx
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx2")]
unsafe fn copy_row_avx(dst: &mut [u8], src: &[u8], start: usize, stride: usize) -> usize {
let mut _cx = start;

while _cx + 128 < stride {
let offset_ptr = src.as_ptr().add(_cx);
let row0 = _mm256_loadu_si256(offset_ptr as *const __m256i);
let row1 = _mm256_loadu_si256(offset_ptr.add(32) as *const __m256i);
let row2 = _mm256_loadu_si256(offset_ptr.add(64) as *const __m256i);
let row3 = _mm256_loadu_si256(offset_ptr.add(96) as *const __m256i);
let dst_offset_ptr = dst.as_mut_ptr().add(_cx);
_mm256_storeu_si256(dst_offset_ptr as *mut __m256i, row0);
_mm256_storeu_si256(dst_offset_ptr.add(32) as *mut __m256i, row1);
_mm256_storeu_si256(dst_offset_ptr.add(64) as *mut __m256i, row2);
_mm256_storeu_si256(dst_offset_ptr.add(96) as *mut __m256i, row3);
_cx += 128;
}

while _cx + 64 < stride {
let offset_ptr = src.as_ptr().add(_cx);
let row0 = _mm256_loadu_si256(offset_ptr as *const __m256i);
let row1 = _mm256_loadu_si256(offset_ptr.add(32) as *const __m256i);
let dst_offset_ptr = dst.as_mut_ptr().add(_cx);
_mm256_storeu_si256(dst_offset_ptr as *mut __m256i, row0);
_mm256_storeu_si256(dst_offset_ptr.add(32) as *mut __m256i, row1);
_cx += 64;
}

while _cx + 32 < stride {
let offset_ptr = src.as_ptr().add(_cx);
let row0 = _mm256_loadu_si256(offset_ptr as *const __m256i);
let dst_offset_ptr = dst.as_mut_ptr().add(_cx);
_mm256_storeu_si256(dst_offset_ptr as *mut __m256i, row0);
_cx += 32;
}

while _cx + 16 < stride {
let offset_ptr = src.as_ptr().add(_cx);
let row0 = _mm_loadu_si128(offset_ptr as *const __m128i);
let dst_offset_ptr = dst.as_mut_ptr().add(_cx);
_mm_storeu_si128(dst_offset_ptr as *mut __m128i, row0);
_cx += 16;
}

while _cx + 8 < stride {
let offset_ptr = src.as_ptr().add(_cx);
let row0 = _mm_loadu_si64(offset_ptr);
let dst_offset_ptr = dst.as_mut_ptr().add(_cx);
std::ptr::copy_nonoverlapping(&row0 as *const _ as *const u8, dst_offset_ptr, 8);
_cx += 8;
}

_cx
}

/// Copies ROI from one image to another
#[allow(clippy::type_complexity)]
pub fn copy_roi<T>(arena: &mut [T], roi: &[T], arena_stride: usize, stride: usize, height: usize)
Expand All @@ -129,6 +185,9 @@ where
if std::arch::is_x86_feature_detected!("sse4.1") {
_row_handle = Some(copy_row_sse);
}
if std::arch::is_x86_feature_detected!("avx2") {
_row_handle = Some(copy_row_avx);
}
}
unsafe {
for _ in 0..height {
Expand Down
22 changes: 21 additions & 1 deletion src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use crate::filter_op_declare::{Arena, MorthOpFilterFlat2DRow};
use crate::flat_se::AnalyzedSe;
use crate::op_type::MorphOp;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::ops::avx::MorphOpFilterAvx2DRow;
use crate::ops::avx::{MorphOpFilterAvx2DRow, MorphOpFilterAvx2DRowF32, MorphOpFilterAvx2DRowU16};
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
use crate::ops::neon::{
MorphOpFilterNeon2DRow, MorphOpFilterNeon2DRowF32, MorphOpFilterNeon2DRowU16,
Expand Down Expand Up @@ -136,6 +136,11 @@ impl Row2DFilter<f32> for f32 {
MorphOpFilterSse2DRowF32::<{ MorphOp::Dilate as u8 }>::default(),
);
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(
MorphOpFilterAvx2DRowF32::<{ MorphOp::Dilate as u8 }>::default(),
);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
Expand All @@ -155,6 +160,11 @@ impl Row2DFilter<f32> for f32 {
MorphOpFilterSse2DRowF32::<{ MorphOp::Erode as u8 }>::default(),
);
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(
MorphOpFilterAvx2DRowF32::<{ MorphOp::Erode as u8 }>::default(),
);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
Expand Down Expand Up @@ -183,6 +193,11 @@ impl Row2DFilter<u16> for u16 {
MorphOpFilterSse2DRowU16::<{ MorphOp::Dilate as u8 }>::default(),
);
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(
MorphOpFilterAvx2DRowU16::<{ MorphOp::Dilate as u8 }>::default(),
);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
Expand All @@ -202,6 +217,11 @@ impl Row2DFilter<u16> for u16 {
MorphOpFilterSse2DRowU16::<{ MorphOp::Erode as u8 }>::default(),
);
}
if std::arch::is_x86_feature_detected!("avx2") {
_result = Box::new(
MorphOpFilterAvx2DRowU16::<{ MorphOp::Erode as u8 }>::default(),
);
}
}
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
{
Expand Down
Loading

0 comments on commit cc34e1b

Please sign in to comment.