Skip to content

Commit

Permalink
Iterate pixels by row, not column, for better cache efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
WilliamVenner committed Jul 9, 2022
1 parent d3253d1 commit 0028987
Show file tree
Hide file tree
Showing 10 changed files with 1,835 additions and 1,044 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "squad-mortar-helper"
version = "0.4.1"
version = "0.4.2"
edition = "2021"
authors = ["William Venner <[email protected]>"]
publish = false
Expand Down
59 changes: 11 additions & 48 deletions util/src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,78 +8,41 @@ macro_rules! par_iter_pixels {
($image:ident[$x:expr, $y:expr, $w:expr, $h:expr]) => {{
if $x + $w > $image.width() || $y + $h > $image.height() {
panic!(
"iter_pixels ({}, {}) to ({}, {}) is outside of bounds for {}x{} image",
"par_iter_pixels ({}, {}) to ({}, {}) is outside of bounds for {}x{} image",
$x, $y, ($x + $w) - 1, ($y + $h) - 1,
$image.width(),
$image.height()
);
}

let self_ = $crate::UnsafeSendPtr::new_const(&$image);
($x..($x + $w)).into_par_iter().map(move |x| ($y..($y + $h)).into_par_iter().map(move |y| (x, y))).flatten().map(move |(x_, y_)| {
($y..($y + $h)).into_par_iter().flat_map(move |y| ($x..($x + $w)).into_par_iter().map(move |x| {
let self_ = unsafe { self_.as_const() };

#[cfg(debug_assertions)]
let p = self_.get_pixel(x_, y_).clone();
let p = self_.get_pixel(x, y).clone();

#[cfg(not(debug_assertions))]
let p = unsafe { self_.unsafe_get_pixel(x_, y_) };
let p = unsafe { self_.unsafe_get_pixel(x, y) };

(x_, y_, p)
})
(x, y, p)
}))
}};

($image:ident) => {{
let (w, h) = $image.dimensions();
let self_ = $crate::UnsafeSendPtr::new_const(&$image);
(0..w).into_par_iter().map(move |x| (0..h).into_par_iter().map(move |y| (x, y))).flatten().map(move |(x_, y_)| {
(0..h).into_par_iter().flat_map(move |y| (0..w).into_par_iter().map(move |x| {
let self_ = unsafe { self_.as_const() };

#[cfg(debug_assertions)]
let p = self_.get_pixel(x_, y_).clone();
let p = self_.get_pixel(x, y).clone();

#[cfg(not(debug_assertions))]
let p = unsafe { self_.unsafe_get_pixel(x_, y_) };
let p = unsafe { self_.unsafe_get_pixel(x, y) };

(x_, y_, p)
})
}};
}

#[macro_export]
macro_rules! iter_pixels {
($image:ident[$x:expr, $y:expr, $w:expr, $h:expr]) => {{
if $x + $w > $image.width() || $y + $h > $image.height() {
panic!(
"iter_pixels ({}, {}) to ({}, {}) is outside of bounds for {}x{} image",
$x, $y, ($x + $w) - 1, ($y + $h) - 1,
$image.width(),
$image.height()
);
}

($x..($x + $w)).into_iter().map(move |x| ($y..($y + $h)).into_iter().map(move |y| (x, y))).flatten().map(|(x_, y_)| {
#[cfg(debug_assertions)]
let p = *$image.get_pixel(x_, y_);

#[cfg(not(debug_assertions))]
let p = unsafe { $image.unsafe_get_pixel(x_, y_) };

(x_, y_, p)
})
}};

($image:ident) => {{
let (w, h) = $image.dimensions();
(0..w).into_iter().map(move |x| (0..h).into_iter().map(move |y| (x, y))).flatten().map(|(x_, y_)| {
#[cfg(debug_assertions)]
let p = *$image.get_pixel(x_, y_);

#[cfg(not(debug_assertions))]
let p = unsafe { $image.unsafe_get_pixel(x_, y_) };

(x_, y_, p)
})
(x, y, p)
}))
}};
}

Expand Down
8 changes: 4 additions & 4 deletions vision-gpu/cuda/cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -458,9 +458,9 @@ edge:
}
}

for (int32_t xx = x - OCR_PREPROCESS_DILATE_RADIUS; xx <= x + OCR_PREPROCESS_DILATE_RADIUS; xx++)
for (int32_t yy = y - OCR_PREPROCESS_DILATE_RADIUS; yy <= y + OCR_PREPROCESS_DILATE_RADIUS; yy++)
{
for (int32_t yy = y - OCR_PREPROCESS_DILATE_RADIUS; yy <= y + OCR_PREPROCESS_DILATE_RADIUS; yy++)
for (int32_t xx = x - OCR_PREPROCESS_DILATE_RADIUS; xx <= x + OCR_PREPROCESS_DILATE_RADIUS; xx++)
{
if (xx < 0 || xx >= w || yy < 0 || yy >= h)
continue;
Expand Down Expand Up @@ -570,9 +570,9 @@ extern "C" __global__ void filter_map_marker_icons(
const uint32_t xx = template_match.xy % stride;
const uint32_t yy = template_match.xy / stride;

for (uint32_t marker_x = 0; marker_x < marker_size; marker_x++)
for (uint32_t marker_y = 0; marker_y < marker_size; marker_y++)
{
for (uint32_t marker_y = 0; marker_y < marker_size; marker_y++)
for (uint32_t marker_x = 0; marker_x < marker_size; marker_x++)
{
RGBA marker_pixel = marker[marker_y * marker_size + marker_x];
RGB pixel = input[(yy + marker_y) * stride + (xx + marker_x)];
Expand Down
Loading

0 comments on commit 0028987

Please sign in to comment.