From ba3413d2461ec2c54054f757e7c9327491df8daf Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 5 Feb 2022 23:52:11 +0100 Subject: [PATCH 01/43] temp driver --- grid_mask_driver.py | 210 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 grid_mask_driver.py diff --git a/grid_mask_driver.py b/grid_mask_driver.py new file mode 100644 index 0000000000..b21559089f --- /dev/null +++ b/grid_mask_driver.py @@ -0,0 +1,210 @@ +"""gridmask_demo.py shows how to use the GridMask preprocessing layer. + +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" + +import matplotlib.pyplot as plt +import tensorflow as tf + +from keras_cv.utils import fill_utils + +IMG_SHAPE = (2, 224, 224) + +img = tf.ones(IMG_SHAPE) + +# %% +ratio = 0.6 + +img_h, img_w = 224, 224 +img_w = tf.cast(img_w, tf.float32) +img_h = tf.cast(img_h, tf.float32) + +squared_w = tf.square(img_w) +squared_h = tf.square(img_h) +mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) +mask_hw = tf.cast(mask_hw, tf.int32) + +d = tf.random.uniform( + shape=[], + minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), + maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, +) +space = ratio * d + +d = tf.cast(d, tf.int32) +space = tf.cast(space, tf.int32) +square_l = d - space + +delta_x = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) +delta_y = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) + +gridsize = mask_hw // d + 1 +gridrange = tf.range(1, gridsize) +d_range = gridrange * d +x1 = d_range - delta_x +x0 = x1 - square_l +y1 = d_range - delta_y +y0 = y1 - square_l + +x0, y0 = tf.meshgrid(x0, y0) +x1, y1 = tf.meshgrid(x1, y1) +corners0 = tf.stack([x0, y0], axis=-1) +corners1 = tf.stack([x1, y1], axis=-1) +corners0 = tf.reshape(corners0, [-1, 2]) +corners1 = tf.reshape(corners1, [-1, 2]) +corners = tf.concat([corners0, corners1], axis=1) + +mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) +masks = fill_utils.rectangle_masks(mask_shape, corners) +mask = tf.reduce_any(masks, axis=0) + +# TODO: Rotate mask +# TODO: Center crop mask + +plt.imshow(mask) +plt.show() + +#%% +ratio = 0.6 + +batch_size, img_h, img_w = tf.shape(img) +img_w = tf.cast(img_w, tf.float32) +img_h = tf.cast(img_h, tf.float32) + +squared_w = tf.square(img_w) +squared_h = tf.square(img_h) +mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) +mask_hw = tf.cast(mask_hw, tf.int32) +mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) + +d = tf.random.uniform( + shape=[batch_size], + minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), + maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, +) +space = ratio * d + +start_xy = tf.random.uniform([batch_size, 2], minval=0, maxval=1, dtype=tf.float32) +start_xy = start_xy * tf.expand_dims(d, 1) + +start_xy = tf.cast(start_xy, tf.int32) +d = tf.cast(d, tf.int32) +space = tf.cast(space, tf.int32) + +start_xy +d +space + +#%% +""" +mask2.png +Y +start 51 +Num blocks 4 +51 101 +135 185 +219 269 +303 353 +X +start 72 +Num blocks 4 +72 122 +156 206 +240 290 +324 374 + +mask3.png +Y +start 7 +Num blocks 5 +7 55 +87 135 +167 215 +247 295 +327 375 +X +start 41 +Num blocks 5 +41 89 +121 169 +201 249 +281 329 +361 403 +""" +ratio = 0.6 + +batch_size, img_h, img_w = tf.shape(img) +img_w = tf.cast(img_w, tf.float32) +img_h = tf.cast(img_h, tf.float32) + +mask_hw = 403 +mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) + +d = tf.constant([84, 80], tf.float32) +space = ratio * d +space = tf.cast(space, tf.int32) + +start_xy = tf.constant( + [ + [72, 51], # mask2 + [41, 7], # mask3 + ] +) + +start_xy = tf.cast(start_xy, tf.int32) +d = tf.cast(d, tf.int32) + +start_xy +d +space + +start_xy + + +### +gridsize_keep = mask_hw // d +gridsize_mask = gridsize_keep + 1 +gridsize_mask + +#%% +ratio = 0.6 + +batch_size, img_h, img_w = tf.shape(img) +img_w = tf.cast(img_w, tf.float32) +img_h = tf.cast(img_h, tf.float32) + +squared_w = tf.square(img_w) +squared_h = tf.square(img_h) +mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) +mask_hw = tf.cast(mask_hw, tf.int32) +mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) + +d = tf.random.uniform( + shape=[batch_size], + minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), + maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, +) +space = ratio * d +space = tf.cast(space, tf.int32) + +start_xy = tf.random.uniform([batch_size, 2], minval=0, maxval=1, dtype=tf.float32) +start_xy = start_xy * tf.expand_dims(d, 1) + +start_xy = tf.cast(start_xy, tf.int32) +d = tf.cast(d, tf.int32) + +gridsize_keep = mask_hw // d +gridsize_mask = gridsize_keep + 1 + +d +start_xy + +#%% +# gridmask = preprocessing.GridMask( +# ratio=0.6, gridmask_size_ratio=0.8, rate=0.8 +# ) +# z = gridmask._grid_mask(img, training=True) +# plt.imshow(z) +# plt.show() From a7439465ce0cb46b1e65f0cbfb7a35ef89262165 Mon Sep 17 00:00:00 2001 From: chjort Date: Tue, 8 Feb 2022 00:28:44 +0100 Subject: [PATCH 02/43] vectorize mask computation --- grid_mask_driver.py | 155 +++++++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 66 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index b21559089f..fdbbab525f 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -10,14 +10,14 @@ from keras_cv.utils import fill_utils -IMG_SHAPE = (2, 224, 224) +IMG_SHAPE = (3, 224, 224) img = tf.ones(IMG_SHAPE) # %% ratio = 0.6 -img_h, img_w = 224, 224 +batch_size, img_h, img_w = IMG_SHAPE img_w = tf.cast(img_w, tf.float32) img_h = tf.cast(img_h, tf.float32) @@ -27,29 +27,59 @@ mask_hw = tf.cast(mask_hw, tf.int32) d = tf.random.uniform( - shape=[], + shape=[batch_size], minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, ) space = ratio * d +delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) +delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) +delta_x = delta_x * space +delta_y = delta_y * space + d = tf.cast(d, tf.int32) -space = tf.cast(space, tf.int32) -square_l = d - space -delta_x = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) -delta_y = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) +gridsize = mask_hw // d +max_gridsize = tf.reduce_max(gridsize) +gridrange = tf.range(1, max_gridsize + 1) +gridrange = tf.tile(tf.expand_dims(gridrange, 0), [batch_size, 1]) + +delta_x = tf.expand_dims(tf.cast(delta_x, tf.int32), 1) +delta_y = tf.expand_dims(tf.cast(delta_y, tf.int32), 1) +d = tf.expand_dims(d, 1) +space = tf.expand_dims(tf.cast(space, tf.int32), 1) +square_l = d - space -gridsize = mask_hw // d + 1 -gridrange = tf.range(1, gridsize) d_range = gridrange * d x1 = d_range - delta_x x0 = x1 - square_l y1 = d_range - delta_y y0 = y1 - square_l -x0, y0 = tf.meshgrid(x0, y0) -x1, y1 = tf.meshgrid(x1, y1) +# mask ranges +d_range_mask = tf.sequence_mask(gridsize, max_gridsize, tf.int32) +x1 = x1 * d_range_mask +x0 = x0 * d_range_mask +y1 = y1 * d_range_mask +y0 = y0 * d_range_mask +x1 + +x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_gridsize, 1]) +y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_gridsize, 1]) +y0 = tf.transpose(y0, [0, 2, 1]) + +x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_gridsize, 1]) +y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_gridsize, 1]) +y1 = tf.transpose(y1, [0, 2, 1]) + +x0 = tf.reshape(x0, [-1, max_gridsize]) +y0 = tf.reshape(y0, [-1, max_gridsize]) +x1 = tf.reshape(x1, [-1, max_gridsize]) +y1 = tf.reshape(y1, [-1, max_gridsize]) +x0 +y0 + corners0 = tf.stack([x0, y0], axis=-1) corners1 = tf.stack([x1, y1], axis=-1) corners0 = tf.reshape(corners0, [-1, 2]) @@ -58,18 +88,30 @@ mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) masks = fill_utils.rectangle_masks(mask_shape, corners) -mask = tf.reduce_any(masks, axis=0) +masks = tf.reshape(masks, [-1, max_gridsize * max_gridsize, mask_hw, mask_hw]) + +hide_mask = tf.reduce_all(corners != 0, axis=1) +hide_mask = tf.reshape(hide_mask, [-1, max_gridsize * max_gridsize]) +masks_ = masks & hide_mask[:, :, tf.newaxis, tf.newaxis] + +mask = tf.reduce_any(masks, axis=1) +mask_ = tf.reduce_any(masks, axis=1) # TODO: Rotate mask # TODO: Center crop mask -plt.imshow(mask) -plt.show() +for m in mask: + plt.imshow(m) + plt.show() -#%% +for m in mask_: + plt.imshow(m) + plt.show() + +# %% ratio = 0.6 -batch_size, img_h, img_w = tf.shape(img) +img_h, img_w = 224, 224 img_w = tf.cast(img_w, tf.float32) img_h = tf.cast(img_h, tf.float32) @@ -77,25 +119,47 @@ squared_h = tf.square(img_h) mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) mask_hw = tf.cast(mask_hw, tf.int32) -mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) d = tf.random.uniform( - shape=[batch_size], + shape=[], minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, ) space = ratio * d -start_xy = tf.random.uniform([batch_size, 2], minval=0, maxval=1, dtype=tf.float32) -start_xy = start_xy * tf.expand_dims(d, 1) - -start_xy = tf.cast(start_xy, tf.int32) d = tf.cast(d, tf.int32) space = tf.cast(space, tf.int32) +square_l = d - space + +delta_x = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) +delta_y = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) + +gridsize = mask_hw // d + 1 +gridrange = tf.range(1, gridsize) +d_range = gridrange * d +x1 = d_range - delta_x +x0 = x1 - square_l +y1 = d_range - delta_y +y0 = y1 - square_l + +x0, y0 = tf.meshgrid(x0, y0) +x1, y1 = tf.meshgrid(x1, y1) +corners0 = tf.stack([x0, y0], axis=-1) +corners1 = tf.stack([x1, y1], axis=-1) +corners0 = tf.reshape(corners0, [-1, 2]) +corners1 = tf.reshape(corners1, [-1, 2]) +corners = tf.concat([corners0, corners1], axis=1) + +mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) +masks = fill_utils.rectangle_masks(mask_shape, corners) +mask = tf.reduce_any(masks, axis=0) + +# TODO: Rotate mask +# TODO: Center crop mask + +plt.imshow(mask) +plt.show() -start_xy -d -space #%% """ @@ -167,44 +231,3 @@ gridsize_keep = mask_hw // d gridsize_mask = gridsize_keep + 1 gridsize_mask - -#%% -ratio = 0.6 - -batch_size, img_h, img_w = tf.shape(img) -img_w = tf.cast(img_w, tf.float32) -img_h = tf.cast(img_h, tf.float32) - -squared_w = tf.square(img_w) -squared_h = tf.square(img_h) -mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) -mask_hw = tf.cast(mask_hw, tf.int32) -mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) - -d = tf.random.uniform( - shape=[batch_size], - minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), - maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, -) -space = ratio * d -space = tf.cast(space, tf.int32) - -start_xy = tf.random.uniform([batch_size, 2], minval=0, maxval=1, dtype=tf.float32) -start_xy = start_xy * tf.expand_dims(d, 1) - -start_xy = tf.cast(start_xy, tf.int32) -d = tf.cast(d, tf.int32) - -gridsize_keep = mask_hw // d -gridsize_mask = gridsize_keep + 1 - -d -start_xy - -#%% -# gridmask = preprocessing.GridMask( -# ratio=0.6, gridmask_size_ratio=0.8, rate=0.8 -# ) -# z = gridmask._grid_mask(img, training=True) -# plt.imshow(z) -# plt.show() From 70587d924ef70883426d4e0fefbf246120353450 Mon Sep 17 00:00:00 2001 From: chjort Date: Tue, 8 Feb 2022 00:29:09 +0100 Subject: [PATCH 03/43] vectorize mask computation --- grid_mask_driver.py | 124 -------------------------------------------- 1 file changed, 124 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index fdbbab525f..ad641c975b 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -107,127 +107,3 @@ for m in mask_: plt.imshow(m) plt.show() - -# %% -ratio = 0.6 - -img_h, img_w = 224, 224 -img_w = tf.cast(img_w, tf.float32) -img_h = tf.cast(img_h, tf.float32) - -squared_w = tf.square(img_w) -squared_h = tf.square(img_h) -mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) -mask_hw = tf.cast(mask_hw, tf.int32) - -d = tf.random.uniform( - shape=[], - minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), - maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, -) -space = ratio * d - -d = tf.cast(d, tf.int32) -space = tf.cast(space, tf.int32) -square_l = d - space - -delta_x = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) -delta_y = tf.random.uniform([], minval=0, maxval=space, dtype=tf.int32) - -gridsize = mask_hw // d + 1 -gridrange = tf.range(1, gridsize) -d_range = gridrange * d -x1 = d_range - delta_x -x0 = x1 - square_l -y1 = d_range - delta_y -y0 = y1 - square_l - -x0, y0 = tf.meshgrid(x0, y0) -x1, y1 = tf.meshgrid(x1, y1) -corners0 = tf.stack([x0, y0], axis=-1) -corners1 = tf.stack([x1, y1], axis=-1) -corners0 = tf.reshape(corners0, [-1, 2]) -corners1 = tf.reshape(corners1, [-1, 2]) -corners = tf.concat([corners0, corners1], axis=1) - -mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) -masks = fill_utils.rectangle_masks(mask_shape, corners) -mask = tf.reduce_any(masks, axis=0) - -# TODO: Rotate mask -# TODO: Center crop mask - -plt.imshow(mask) -plt.show() - - -#%% -""" -mask2.png -Y -start 51 -Num blocks 4 -51 101 -135 185 -219 269 -303 353 -X -start 72 -Num blocks 4 -72 122 -156 206 -240 290 -324 374 - -mask3.png -Y -start 7 -Num blocks 5 -7 55 -87 135 -167 215 -247 295 -327 375 -X -start 41 -Num blocks 5 -41 89 -121 169 -201 249 -281 329 -361 403 -""" -ratio = 0.6 - -batch_size, img_h, img_w = tf.shape(img) -img_w = tf.cast(img_w, tf.float32) -img_h = tf.cast(img_h, tf.float32) - -mask_hw = 403 -mask = tf.zeros((batch_size, mask_hw, mask_hw), dtype=tf.bool) - -d = tf.constant([84, 80], tf.float32) -space = ratio * d -space = tf.cast(space, tf.int32) - -start_xy = tf.constant( - [ - [72, 51], # mask2 - [41, 7], # mask3 - ] -) - -start_xy = tf.cast(start_xy, tf.int32) -d = tf.cast(d, tf.int32) - -start_xy -d -space - -start_xy - - -### -gridsize_keep = mask_hw // d -gridsize_mask = gridsize_keep + 1 -gridsize_mask From 1fe49cd6fb17ca9964515b1adcc0f109f9604832 Mon Sep 17 00:00:00 2001 From: chjort Date: Wed, 9 Feb 2022 00:59:08 +0100 Subject: [PATCH 04/43] vectorize mask computation --- grid_mask_driver.py | 187 +++++++++++++++++++++++--------------------- 1 file changed, 99 insertions(+), 88 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index ad641c975b..7d9c311d99 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -10,100 +10,111 @@ from keras_cv.utils import fill_utils -IMG_SHAPE = (3, 224, 224) +RATIO = 0.6 + + +def _compute_masks(inputs): + """Computes grid masks for all inputs""" + input_shape = tf.shape(inputs) + batch_size = input_shape[0] + height = tf.cast(input_shape[1], tf.float32) + width = tf.cast(input_shape[2], tf.float32) + + # masks side length + squared_w = tf.square(width) + squared_h = tf.square(height) + mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) + mask_hw = tf.cast(mask_hw, tf.int32) + + # grid unit sizes + unit_sizes = tf.random.uniform( + shape=[batch_size], + minval=tf.math.minimum(height * 0.5, width * 0.3), + maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, + ) + rectangle_lengths = tf.cast((1 - RATIO) * unit_sizes, tf.int32) + + # x and y offsets for grid units + delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) + delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) + delta_x = tf.cast(delta_x * unit_sizes, tf.int32) + delta_y = tf.cast(delta_y * unit_sizes, tf.int32) + + # number of diagonal units per grid (grid size) + unit_sizes = tf.cast(unit_sizes, tf.int32) + grid_sizes = mask_hw // unit_sizes + 1 + max_grid_size = tf.reduce_max(grid_sizes) + + # diagonal range per image + diag_range = tf.range(1, max_grid_size + 1) + diag_range = tf.tile(tf.expand_dims(diag_range, 0), [batch_size, 1]) + + # add broadcasting axis for diagonal ranges + delta_x = tf.expand_dims(delta_x, 1) + delta_y = tf.expand_dims(delta_y, 1) + unit_sizes = tf.expand_dims(unit_sizes, 1) + rectangle_lengths = tf.expand_dims(rectangle_lengths, 1) + + # diagonal corner coordinates + d_range = diag_range * unit_sizes + x1 = d_range - delta_x + x0 = x1 - rectangle_lengths + y1 = d_range - delta_y + y0 = y1 - rectangle_lengths + + # mask coordinates by grid ranges + d_range_mask = tf.sequence_mask( + lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 + ) + x1 = x1 * d_range_mask + x0 = x0 * d_range_mask + y1 = y1 * d_range_mask + y0 = y0 * d_range_mask + + # expand diagonal top left corner coordinates into a mesh plane + x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) + y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) + y0 = tf.transpose(y0, [0, 2, 1]) + + # expand diagonal bottom right corner coordinates into a mesh plane + x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) + y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) + y1 = tf.transpose(y1, [0, 2, 1]) + + # flatten mesh planes to mesh grids + x0 = tf.reshape(x0, [-1, max_grid_size]) + y0 = tf.reshape(y0, [-1, max_grid_size]) + x1 = tf.reshape(x1, [-1, max_grid_size]) + y1 = tf.reshape(y1, [-1, max_grid_size]) + + # combine coordinates to (x0, y0, x1, y1) with shape (num_rectangles_in_batch, 4) + corners0 = tf.stack([x0, y0], axis=-1) + corners1 = tf.stack([x1, y1], axis=-1) + corners0 = tf.reshape(corners0, [-1, 2]) + corners1 = tf.reshape(corners1, [-1, 2]) + corners = tf.concat([corners0, corners1], axis=1) + + # make mask for each rectangle + mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) + masks = fill_utils.rectangle_masks(mask_shape, corners) + + # reshape masks into shape (batch_size, rectangles_per_image, mask_width, mask_height) + masks = tf.reshape(masks, [-1, max_grid_size * max_grid_size, mask_hw, mask_hw]) + + # combine rectangle masks per image + masks = tf.reduce_any(masks, axis=1) + + return masks -img = tf.ones(IMG_SHAPE) # %% -ratio = 0.6 - -batch_size, img_h, img_w = IMG_SHAPE -img_w = tf.cast(img_w, tf.float32) -img_h = tf.cast(img_h, tf.float32) - -squared_w = tf.square(img_w) -squared_h = tf.square(img_h) -mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) -mask_hw = tf.cast(mask_hw, tf.int32) - -d = tf.random.uniform( - shape=[batch_size], - minval=tf.math.minimum(img_h * 0.5, img_w * 0.3), - maxval=tf.math.maximum(img_h * 0.5, img_w * 0.3) + 1, -) -space = ratio * d - -delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) -delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) -delta_x = delta_x * space -delta_y = delta_y * space - -d = tf.cast(d, tf.int32) - -gridsize = mask_hw // d -max_gridsize = tf.reduce_max(gridsize) -gridrange = tf.range(1, max_gridsize + 1) -gridrange = tf.tile(tf.expand_dims(gridrange, 0), [batch_size, 1]) - -delta_x = tf.expand_dims(tf.cast(delta_x, tf.int32), 1) -delta_y = tf.expand_dims(tf.cast(delta_y, tf.int32), 1) -d = tf.expand_dims(d, 1) -space = tf.expand_dims(tf.cast(space, tf.int32), 1) -square_l = d - space - -d_range = gridrange * d -x1 = d_range - delta_x -x0 = x1 - square_l -y1 = d_range - delta_y -y0 = y1 - square_l - -# mask ranges -d_range_mask = tf.sequence_mask(gridsize, max_gridsize, tf.int32) -x1 = x1 * d_range_mask -x0 = x0 * d_range_mask -y1 = y1 * d_range_mask -y0 = y0 * d_range_mask -x1 - -x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_gridsize, 1]) -y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_gridsize, 1]) -y0 = tf.transpose(y0, [0, 2, 1]) - -x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_gridsize, 1]) -y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_gridsize, 1]) -y1 = tf.transpose(y1, [0, 2, 1]) - -x0 = tf.reshape(x0, [-1, max_gridsize]) -y0 = tf.reshape(y0, [-1, max_gridsize]) -x1 = tf.reshape(x1, [-1, max_gridsize]) -y1 = tf.reshape(y1, [-1, max_gridsize]) -x0 -y0 - -corners0 = tf.stack([x0, y0], axis=-1) -corners1 = tf.stack([x1, y1], axis=-1) -corners0 = tf.reshape(corners0, [-1, 2]) -corners1 = tf.reshape(corners1, [-1, 2]) -corners = tf.concat([corners0, corners1], axis=1) - -mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) -masks = fill_utils.rectangle_masks(mask_shape, corners) -masks = tf.reshape(masks, [-1, max_gridsize * max_gridsize, mask_hw, mask_hw]) - -hide_mask = tf.reduce_all(corners != 0, axis=1) -hide_mask = tf.reshape(hide_mask, [-1, max_gridsize * max_gridsize]) -masks_ = masks & hide_mask[:, :, tf.newaxis, tf.newaxis] - -mask = tf.reduce_any(masks, axis=1) -mask_ = tf.reduce_any(masks, axis=1) +IMG_SHAPE = (5, 224, 224) +img = tf.ones(IMG_SHAPE) +masks = _compute_masks(img) # TODO: Rotate mask # TODO: Center crop mask -for m in mask: - plt.imshow(m) - plt.show() - -for m in mask_: +for m in masks: plt.imshow(m) plt.show() From e18016c4d05c9ea3625a097c9d98e8e4647a70b3 Mon Sep 17 00:00:00 2001 From: chjort Date: Wed, 9 Feb 2022 01:00:53 +0100 Subject: [PATCH 05/43] comment --- grid_mask_driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index 7d9c311d99..e304e4207a 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -98,7 +98,7 @@ def _compute_masks(inputs): mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) masks = fill_utils.rectangle_masks(mask_shape, corners) - # reshape masks into shape (batch_size, rectangles_per_image, mask_width, mask_height) + # reshape masks into shape (batch_size, rectangles_per_image, mask_height, mask_width) masks = tf.reshape(masks, [-1, max_grid_size * max_grid_size, mask_hw, mask_hw]) # combine rectangle masks per image From 8f0378728ffb9100199822bb0fdb981f5f5f3f00 Mon Sep 17 00:00:00 2001 From: chjort Date: Wed, 9 Feb 2022 01:19:49 +0100 Subject: [PATCH 06/43] random rotation and center cropping --- grid_mask_driver.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index e304e4207a..546834d64b 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -112,8 +112,17 @@ def _compute_masks(inputs): img = tf.ones(IMG_SHAPE) masks = _compute_masks(img) -# TODO: Rotate mask -# TODO: Center crop mask +masks = tf.expand_dims(tf.cast(masks, tf.uint8), -1) + +rotate = tf.keras.layers.RandomRotation( + factor=1.0, fill_mode="constant", fill_value=0.0 +) +masks = rotate(masks) + +center_crop = tf.keras.layers.CenterCrop(224, 224) +masks = center_crop(masks) + +masks = tf.cast(masks, tf.bool) for m in masks: plt.imshow(m) From 6a0e9a61af81c5b788cdd463a4e61bf213544aab Mon Sep 17 00:00:00 2001 From: chjort Date: Fri, 11 Feb 2022 18:32:07 +0100 Subject: [PATCH 07/43] finish vectorized gridmask computation --- grid_mask_driver.py | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index 546834d64b..a24fc00b91 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -11,20 +11,22 @@ from keras_cv.utils import fill_utils RATIO = 0.6 +H_AXIS = -3 +W_AXIS = -2 def _compute_masks(inputs): """Computes grid masks for all inputs""" input_shape = tf.shape(inputs) batch_size = input_shape[0] - height = tf.cast(input_shape[1], tf.float32) - width = tf.cast(input_shape[2], tf.float32) + height = tf.cast(input_shape[H_AXIS], tf.float32) + width = tf.cast(input_shape[W_AXIS], tf.float32) # masks side length squared_w = tf.square(width) squared_h = tf.square(height) - mask_hw = tf.math.ceil(tf.sqrt(squared_w + squared_h)) - mask_hw = tf.cast(mask_hw, tf.int32) + mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) + mask_side_length = tf.cast(mask_side_length, tf.int32) # grid unit sizes unit_sizes = tf.random.uniform( @@ -42,7 +44,7 @@ def _compute_masks(inputs): # number of diagonal units per grid (grid size) unit_sizes = tf.cast(unit_sizes, tf.int32) - grid_sizes = mask_hw // unit_sizes + 1 + grid_sizes = mask_side_length // unit_sizes + 1 max_grid_size = tf.reduce_max(grid_sizes) # diagonal range per image @@ -95,11 +97,14 @@ def _compute_masks(inputs): corners = tf.concat([corners0, corners1], axis=1) # make mask for each rectangle - mask_shape = (tf.shape(corners)[0], mask_hw, mask_hw) + mask_shape = (tf.shape(corners)[0], mask_side_length, mask_side_length) masks = fill_utils.rectangle_masks(mask_shape, corners) # reshape masks into shape (batch_size, rectangles_per_image, mask_height, mask_width) - masks = tf.reshape(masks, [-1, max_grid_size * max_grid_size, mask_hw, mask_hw]) + masks = tf.reshape( + masks, + [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], + ) # combine rectangle masks per image masks = tf.reduce_any(masks, axis=1) @@ -108,20 +113,30 @@ def _compute_masks(inputs): # %% -IMG_SHAPE = (5, 224, 224) -img = tf.ones(IMG_SHAPE) -masks = _compute_masks(img) +def _center_crop(masks, width, height): + masks_shape = tf.shape(masks) + h_diff = masks_shape[1] - height + w_diff = masks_shape[2] - width -masks = tf.expand_dims(tf.cast(masks, tf.uint8), -1) + h_start = tf.cast(h_diff / 2, tf.int32) + w_start = tf.cast(w_diff / 2, tf.int32) + return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) + + +inputs = tf.ones((5, 224, 224, 3)) +masks = _compute_masks(inputs) rotate = tf.keras.layers.RandomRotation( factor=1.0, fill_mode="constant", fill_value=0.0 ) -masks = rotate(masks) -center_crop = tf.keras.layers.CenterCrop(224, 224) -masks = center_crop(masks) +masks = tf.expand_dims(tf.cast(masks, tf.uint8), -1) +masks = rotate(masks) +input_shape = tf.shape(inputs) +input_height = input_shape[H_AXIS] +input_width = input_shape[W_AXIS] +masks = _center_crop(masks, input_width, input_height) masks = tf.cast(masks, tf.bool) for m in masks: From 27f20259aa2cc798d1eb26581c67225e0b3103bc Mon Sep 17 00:00:00 2001 From: chjort Date: Fri, 11 Feb 2022 18:40:45 +0100 Subject: [PATCH 08/43] refactor --- grid_mask_driver.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index a24fc00b91..b2d6ba955e 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -15,7 +15,7 @@ W_AXIS = -2 -def _compute_masks(inputs): +def _compute_grid_masks(inputs): """Computes grid masks for all inputs""" input_shape = tf.shape(inputs) batch_size = input_shape[0] @@ -112,7 +112,6 @@ def _compute_masks(inputs): return masks -# %% def _center_crop(masks, width, height): masks_shape = tf.shape(masks) h_diff = masks_shape[1] - height @@ -123,20 +122,27 @@ def _center_crop(masks, width, height): return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) +# %% inputs = tf.ones((5, 224, 224, 3)) -masks = _compute_masks(inputs) +masks = _compute_grid_masks(inputs) + +# convert mask to single-channel image +masks = tf.cast(masks, tf.uint8) +masks = tf.expand_dims(masks, axis=-1) +# randomly rotate masks rotate = tf.keras.layers.RandomRotation( factor=1.0, fill_mode="constant", fill_value=0.0 ) - -masks = tf.expand_dims(tf.cast(masks, tf.uint8), -1) masks = rotate(masks) +# center crop masks input_shape = tf.shape(inputs) input_height = input_shape[H_AXIS] input_width = input_shape[W_AXIS] masks = _center_crop(masks, input_width, input_height) + +# convert back to boolean mask masks = tf.cast(masks, tf.bool) for m in masks: From 92a019c44938b7dcbbef9647015129c6df310585 Mon Sep 17 00:00:00 2001 From: chjort Date: Fri, 11 Feb 2022 18:41:37 +0100 Subject: [PATCH 09/43] comment --- grid_mask_driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index b2d6ba955e..5c8560116f 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -16,7 +16,7 @@ def _compute_grid_masks(inputs): - """Computes grid masks for all inputs""" + """Computes grid masks for all input images""" input_shape = tf.shape(inputs) batch_size = input_shape[0] height = tf.cast(input_shape[H_AXIS], tf.float32) From 15968f4805a892f1508c2bfc4ce6b84ca77bfd61 Mon Sep 17 00:00:00 2001 From: chjort Date: Fri, 11 Feb 2022 18:42:02 +0100 Subject: [PATCH 10/43] comment --- grid_mask_driver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index 5c8560116f..078c2ca0be 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -16,7 +16,7 @@ def _compute_grid_masks(inputs): - """Computes grid masks for all input images""" + """Computes grid masks""" input_shape = tf.shape(inputs) batch_size = input_shape[0] height = tf.cast(input_shape[H_AXIS], tf.float32) From 2a24c944a8cea0f452f2fa9a5acf21669642db7e Mon Sep 17 00:00:00 2001 From: chjort Date: Fri, 11 Feb 2022 18:50:38 +0100 Subject: [PATCH 11/43] comments --- grid_mask_driver.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index 078c2ca0be..b47dc1d91b 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -34,7 +34,7 @@ def _compute_grid_masks(inputs): minval=tf.math.minimum(height * 0.5, width * 0.3), maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, ) - rectangle_lengths = tf.cast((1 - RATIO) * unit_sizes, tf.int32) + rectangle_side_length = tf.cast((1 - RATIO) * unit_sizes, tf.int32) # x and y offsets for grid units delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) @@ -42,27 +42,27 @@ def _compute_grid_masks(inputs): delta_x = tf.cast(delta_x * unit_sizes, tf.int32) delta_y = tf.cast(delta_y * unit_sizes, tf.int32) - # number of diagonal units per grid (grid size) + # grid size (number of diagonal units per grid) unit_sizes = tf.cast(unit_sizes, tf.int32) grid_sizes = mask_side_length // unit_sizes + 1 max_grid_size = tf.reduce_max(grid_sizes) - # diagonal range per image - diag_range = tf.range(1, max_grid_size + 1) - diag_range = tf.tile(tf.expand_dims(diag_range, 0), [batch_size, 1]) + # grid size range per image + grid_size_range = tf.range(1, max_grid_size + 1) + grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) - # add broadcasting axis for diagonal ranges + # make broadcastable to grid size ranges delta_x = tf.expand_dims(delta_x, 1) delta_y = tf.expand_dims(delta_y, 1) unit_sizes = tf.expand_dims(unit_sizes, 1) - rectangle_lengths = tf.expand_dims(rectangle_lengths, 1) + rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) # diagonal corner coordinates - d_range = diag_range * unit_sizes + d_range = grid_size_range * unit_sizes x1 = d_range - delta_x - x0 = x1 - rectangle_lengths + x0 = x1 - rectangle_side_length y1 = d_range - delta_y - y0 = y1 - rectangle_lengths + y0 = y1 - rectangle_side_length # mask coordinates by grid ranges d_range_mask = tf.sequence_mask( @@ -73,17 +73,17 @@ def _compute_grid_masks(inputs): y1 = y1 * d_range_mask y0 = y0 * d_range_mask - # expand diagonal top left corner coordinates into a mesh plane + # mesh grid of diagonal top left corner coordinates for each image x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) y0 = tf.transpose(y0, [0, 2, 1]) - # expand diagonal bottom right corner coordinates into a mesh plane + # mesh grid of diagonal bottom right corner coordinates for each image x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) y1 = tf.transpose(y1, [0, 2, 1]) - # flatten mesh planes to mesh grids + # flatten mesh grids x0 = tf.reshape(x0, [-1, max_grid_size]) y0 = tf.reshape(y0, [-1, max_grid_size]) x1 = tf.reshape(x1, [-1, max_grid_size]) From fb443c71c2f5d607b2fd96be520e09155a0d5c1f Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sun, 13 Feb 2022 03:13:09 +0100 Subject: [PATCH 12/43] comments --- grid_mask_driver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index b47dc1d91b..391f373fee 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -89,7 +89,8 @@ def _compute_grid_masks(inputs): x1 = tf.reshape(x1, [-1, max_grid_size]) y1 = tf.reshape(y1, [-1, max_grid_size]) - # combine coordinates to (x0, y0, x1, y1) with shape (num_rectangles_in_batch, 4) + # combine coordinates to (x0, y0, x1, y1) + # with shape (num_rectangles_in_batch, 4) corners0 = tf.stack([x0, y0], axis=-1) corners1 = tf.stack([x1, y1], axis=-1) corners0 = tf.reshape(corners0, [-1, 2]) @@ -100,7 +101,8 @@ def _compute_grid_masks(inputs): mask_shape = (tf.shape(corners)[0], mask_side_length, mask_side_length) masks = fill_utils.rectangle_masks(mask_shape, corners) - # reshape masks into shape (batch_size, rectangles_per_image, mask_height, mask_width) + # reshape masks into shape + # (batch_size, rectangles_per_image, mask_height, mask_width) masks = tf.reshape( masks, [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], From abd8048ce4d0e83a26d021dc1d39e39d7e2e5ca7 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 15 Feb 2022 18:26:53 +0100 Subject: [PATCH 13/43] merge master into branch --- .github/CONTRIBUTING.md | 1 - keras_cv/layers/preprocessing/__init__.py | 2 + keras_cv/layers/preprocessing/equalization.py | 91 ++++++++++++++++ .../layers/preprocessing/equalization_test.py | 26 +++++ keras_cv/layers/preprocessing/grayscale.py | 92 ++++++++++++++++ .../layers/preprocessing/grayscale_test.py | 103 ++++++++++++++++++ 6 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 keras_cv/layers/preprocessing/equalization.py create mode 100644 keras_cv/layers/preprocessing/equalization_test.py create mode 100644 keras_cv/layers/preprocessing/grayscale.py create mode 100644 keras_cv/layers/preprocessing/grayscale_test.py diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 85fe00707a..311bd4198a 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -21,7 +21,6 @@ To make code changes, you need to fork the repository. You will need to setup a development environment and run the unit tests. This is covered in section "Setup environment". - If your code change involves introducing a new API change, please see our [API Design Guidelines](API_DESIGN.md). diff --git a/keras_cv/layers/preprocessing/__init__.py b/keras_cv/layers/preprocessing/__init__.py index e5539ddd66..d3bc653855 100644 --- a/keras_cv/layers/preprocessing/__init__.py +++ b/keras_cv/layers/preprocessing/__init__.py @@ -13,6 +13,8 @@ # limitations under the License. from keras_cv.layers.preprocessing.cut_mix import CutMix +from keras_cv.layers.preprocessing.equalization import Equalization +from keras_cv.layers.preprocessing.grayscale import Grayscale from keras_cv.layers.preprocessing.grid_mask import GridMask from keras_cv.layers.preprocessing.mix_up import MixUp from keras_cv.layers.preprocessing.random_cutout import RandomCutout diff --git a/keras_cv/layers/preprocessing/equalization.py b/keras_cv/layers/preprocessing/equalization.py new file mode 100644 index 0000000000..acb231b228 --- /dev/null +++ b/keras_cv/layers/preprocessing/equalization.py @@ -0,0 +1,91 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + + +class Equalization(tf.keras.layers.Layer): + """Equalization performs histogram equalization on a channel-wise basis. + + Args: + bins: Integer indicating the number of bins to use in histogram equalization. + Should be in the range [0, 256] + + Usage: + ```python + equalize = Equalization() + + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + # Note that images are an int8 Tensor with values in the range [0, 255] + images = equalize(images) + ``` + + Call arguments: + images: Tensor of pixels in range [0, 255], in RGB format. Can be + of type float or int. Should be in NHWC format. + """ + + def __init__(self, bins=256, **kwargs): + super().__init__(**kwargs) + self.bins = bins + + def equalize_channel(self, image, channel_index): + """equalize_channel performs histogram equalization on a single channel. + + Args: + image: int Tensor with pixels in range [0, 255], RGB format, + with channels last + channel_index: channel to equalize + """ + dtype = image.dtype + image = image[..., channel_index] + # Compute the histogram of the image channel. + histogram = tf.histogram_fixed_width(image, [0, 255], nbins=self.bins) + + # For the purposes of computing the step, filter out the nonzeros. + nonzero = tf.where(tf.not_equal(histogram, 0)) + nonzero_histogram = tf.reshape(tf.gather(histogram, nonzero), [-1]) + step = (tf.reduce_sum(nonzero_histogram) - nonzero_histogram[-1]) // ( + self.bins - 1 + ) + + def build_mapping(histogram, step): + # Compute the cumulative sum, shifting by step // 2 + # and then normalization by step. + lookup_table = (tf.cumsum(histogram) + (step // 2)) // step + # Shift lookup_table, prepending with 0. + lookup_table = tf.concat([[0], lookup_table[:-1]], 0) + # Clip the counts to be in range. This is done + # in the C code for image.point. + return tf.clip_by_value(lookup_table, 0, 255) + + # If step is zero, return the original image. Otherwise, build + # lookup table from the full histogram and step and then index from it. + result = tf.cond( + tf.equal(step, 0), + lambda: image, + lambda: tf.gather(build_mapping(histogram, step), image), + ) + + return tf.cast(result, dtype) + + def call(self, images): + # Assumes RGB for now. Scales each channel independently + # and then stacks the result. + # TODO(lukewood): ideally this would be vectorized. + r = tf.map_fn(lambda x: self.equalize_channel(x, 0), images) + g = tf.map_fn(lambda x: self.equalize_channel(x, 1), images) + b = tf.map_fn(lambda x: self.equalize_channel(x, 2), images) + + images = tf.stack([r, g, b], axis=-1) + return images diff --git a/keras_cv/layers/preprocessing/equalization_test.py b/keras_cv/layers/preprocessing/equalization_test.py new file mode 100644 index 0000000000..d1f612a31a --- /dev/null +++ b/keras_cv/layers/preprocessing/equalization_test.py @@ -0,0 +1,26 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers.preprocessing.equalization import Equalization + + +class EqualizationTest(tf.test.TestCase): + def test_return_shapes(self): + xs = 255 * tf.ones((2, 512, 512, 3), dtype=tf.int32) + layer = Equalization() + xs = layer(xs) + + self.assertEqual(xs.shape, [2, 512, 512, 3]) + self.assertAllEqual(xs, 255 * tf.ones((2, 512, 512, 3))) diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py new file mode 100644 index 0000000000..6195678cac --- /dev/null +++ b/keras_cv/layers/preprocessing/grayscale.py @@ -0,0 +1,92 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from tensorflow.keras import backend +from tensorflow.keras import layers + + +class Grayscale(layers.Layer): + """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images. + Input images should have values in the range of [0, 255]. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + Args: + output_channels. + Number color channels present in the output image. + The output_channels can be 1 or 3. RGB image with shape + (..., height, width, 3) will have the following shapes + after the `Grayscale` operation: + a. (..., height, width, 1) if output_channels = 1 + b. (..., height, width, 3) if output_channels = 3. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + to_grayscale = keras_cv.layers.preprocessing.Grayscale() + augmented_images = to_grayscale(images) + ``` + """ + + def __init__(self, output_channels=1, **kwargs): + super().__init__(**kwargs) + self.output_channels = output_channels + + def _check_input_params(self, output_channels): + if output_channels not in [1, 3]: + raise ValueError( + "Received invalid argument output_channels. " + f"output_channels must be in 1 or 3. Got {output_channels}" + ) + self.output_channels = output_channels + + def _rgb_to_grayscale(self, image): + grayscale = tf.image.rgb_to_grayscale(image) + if self.output_channels == 1: + return grayscale + elif self.output_channels == 3: + return tf.image.grayscale_to_rgb(grayscale) + else: + raise ValueError("Unsupported value for `output_channels`.") + + def call(self, images, training=None): + """call method for the ChannelShuffle layer. + Args: + images: Tensor representing images of shape + [batch_size, width, height, channels] or + [width, height, channels] with type float or int. + Pixel values should be in the range [0, 255] + Returns: + images: augmented images, same shape as input. + """ + if training is None: + training = backend.learning_phase() + + return tf.cond( + tf.cast(training, tf.bool), + lambda: self._rgb_to_grayscale(images), + lambda: images, + ) + + def get_config(self): + config = { + "output_channels": self.output_channels, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/grayscale_test.py b/keras_cv/layers/preprocessing/grayscale_test.py new file mode 100644 index 0000000000..c7ccf335f7 --- /dev/null +++ b/keras_cv/layers/preprocessing/grayscale_test.py @@ -0,0 +1,103 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class GrayscaleTest(tf.test.TestCase): + def test_return_shapes(self): + xs = tf.ones((2, 512, 512, 3)) + + layer = preprocessing.Grayscale( + output_channels=1, + ) + xs1 = layer(xs, training=True) + + layer = preprocessing.Grayscale( + output_channels=3, + ) + xs2 = layer(xs, training=True) + + self.assertEqual(xs1.shape, [2, 512, 512, 1]) + self.assertEqual(xs2.shape, [2, 512, 512, 3]) + + def test_in_tf_function(self): + xs = tf.cast( + tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0), + tf.float32, + ) + + # test 1 + layer = preprocessing.Grayscale( + output_channels=1, + ) + + @tf.function + def augment(x): + return layer(x, training=True) + + xs1 = augment(xs) + + # test 2 + layer = preprocessing.Grayscale( + output_channels=3, + ) + + @tf.function + def augment(x): + return layer(x, training=True) + + xs2 = augment(xs) + + self.assertEqual(xs1.shape, [2, 100, 100, 1]) + self.assertEqual(xs2.shape, [2, 100, 100, 3]) + + def test_non_square_image(self): + xs = tf.cast( + tf.stack([2 * tf.ones((512, 1024, 3)), tf.ones((512, 1024, 3))], axis=0), + tf.float32, + ) + + layer = preprocessing.Grayscale( + output_channels=1, + ) + xs1 = layer(xs, training=True) + + layer = preprocessing.Grayscale( + output_channels=3, + ) + xs2 = layer(xs, training=True) + + self.assertEqual(xs1.shape, [2, 512, 1024, 1]) + self.assertEqual(xs2.shape, [2, 512, 1024, 3]) + + def test_in_single_image(self): + xs = tf.cast( + tf.ones((512, 512, 3)), + dtype=tf.float32, + ) + + layer = preprocessing.Grayscale( + output_channels=1, + ) + xs1 = layer(xs, training=True) + + layer = preprocessing.Grayscale( + output_channels=3, + ) + xs2 = layer(xs, training=True) + + self.assertEqual(xs1.shape, [512, 512, 1]) + self.assertEqual(xs2.shape, [512, 512, 3]) From 9599c15e088df2a89847c7f8f95db8b6e464bff6 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 15 Feb 2022 19:29:11 +0100 Subject: [PATCH 14/43] initial vectorized layer --- .../layers/preprocessing/grid_mask_demo.py | 2 +- grid_mask_driver.py | 300 ++++++++++-------- keras_cv/layers/preprocessing/grid_mask.py | 247 ++++++++------ 3 files changed, 326 insertions(+), 223 deletions(-) diff --git a/examples/layers/preprocessing/grid_mask_demo.py b/examples/layers/preprocessing/grid_mask_demo.py index 55691a2653..4b54470536 100644 --- a/examples/layers/preprocessing/grid_mask_demo.py +++ b/examples/layers/preprocessing/grid_mask_demo.py @@ -37,7 +37,7 @@ def main(): ratio="random", rotation_factor=0.5, fill_mode="gaussian_noise" ) train_ds = train_ds.map( - lambda x, y: (gridmask(x, training=True), y), + lambda x, y: (gridmask(x), y), num_parallel_calls=tf.data.AUTOTUNE, ) diff --git a/grid_mask_driver.py b/grid_mask_driver.py index 391f373fee..069cf2b856 100644 --- a/grid_mask_driver.py +++ b/grid_mask_driver.py @@ -7,146 +7,192 @@ import matplotlib.pyplot as plt import tensorflow as tf +import tensorflow_datasets as tfds from keras_cv.utils import fill_utils +from keras_cv.layers import preprocessing RATIO = 0.6 H_AXIS = -3 W_AXIS = -2 -def _compute_grid_masks(inputs): - """Computes grid masks""" - input_shape = tf.shape(inputs) - batch_size = input_shape[0] - height = tf.cast(input_shape[H_AXIS], tf.float32) - width = tf.cast(input_shape[W_AXIS], tf.float32) - - # masks side length - squared_w = tf.square(width) - squared_h = tf.square(height) - mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) - mask_side_length = tf.cast(mask_side_length, tf.int32) - - # grid unit sizes - unit_sizes = tf.random.uniform( - shape=[batch_size], - minval=tf.math.minimum(height * 0.5, width * 0.3), - maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, - ) - rectangle_side_length = tf.cast((1 - RATIO) * unit_sizes, tf.int32) - - # x and y offsets for grid units - delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) - delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) - delta_x = tf.cast(delta_x * unit_sizes, tf.int32) - delta_y = tf.cast(delta_y * unit_sizes, tf.int32) - - # grid size (number of diagonal units per grid) - unit_sizes = tf.cast(unit_sizes, tf.int32) - grid_sizes = mask_side_length // unit_sizes + 1 - max_grid_size = tf.reduce_max(grid_sizes) - - # grid size range per image - grid_size_range = tf.range(1, max_grid_size + 1) - grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) - - # make broadcastable to grid size ranges - delta_x = tf.expand_dims(delta_x, 1) - delta_y = tf.expand_dims(delta_y, 1) - unit_sizes = tf.expand_dims(unit_sizes, 1) - rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) - - # diagonal corner coordinates - d_range = grid_size_range * unit_sizes - x1 = d_range - delta_x - x0 = x1 - rectangle_side_length - y1 = d_range - delta_y - y0 = y1 - rectangle_side_length - - # mask coordinates by grid ranges - d_range_mask = tf.sequence_mask( - lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 - ) - x1 = x1 * d_range_mask - x0 = x0 * d_range_mask - y1 = y1 * d_range_mask - y0 = y0 * d_range_mask - - # mesh grid of diagonal top left corner coordinates for each image - x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) - y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) - y0 = tf.transpose(y0, [0, 2, 1]) - - # mesh grid of diagonal bottom right corner coordinates for each image - x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) - y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) - y1 = tf.transpose(y1, [0, 2, 1]) - - # flatten mesh grids - x0 = tf.reshape(x0, [-1, max_grid_size]) - y0 = tf.reshape(y0, [-1, max_grid_size]) - x1 = tf.reshape(x1, [-1, max_grid_size]) - y1 = tf.reshape(y1, [-1, max_grid_size]) - - # combine coordinates to (x0, y0, x1, y1) - # with shape (num_rectangles_in_batch, 4) - corners0 = tf.stack([x0, y0], axis=-1) - corners1 = tf.stack([x1, y1], axis=-1) - corners0 = tf.reshape(corners0, [-1, 2]) - corners1 = tf.reshape(corners1, [-1, 2]) - corners = tf.concat([corners0, corners1], axis=1) - - # make mask for each rectangle - mask_shape = (tf.shape(corners)[0], mask_side_length, mask_side_length) - masks = fill_utils.rectangle_masks(mask_shape, corners) - - # reshape masks into shape - # (batch_size, rectangles_per_image, mask_height, mask_width) - masks = tf.reshape( - masks, - [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], - ) - - # combine rectangle masks per image - masks = tf.reduce_any(masks, axis=1) - - return masks - - -def _center_crop(masks, width, height): - masks_shape = tf.shape(masks) - h_diff = masks_shape[1] - height - w_diff = masks_shape[2] - width - - h_start = tf.cast(h_diff / 2, tf.int32) - w_start = tf.cast(w_diff / 2, tf.int32) - return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) +# def _compute_grid_masks(inputs): +# """Computes grid masks""" +# input_shape = tf.shape(inputs) +# batch_size = input_shape[0] +# height = tf.cast(input_shape[H_AXIS], tf.float32) +# width = tf.cast(input_shape[W_AXIS], tf.float32) +# +# # masks side length +# squared_w = tf.square(width) +# squared_h = tf.square(height) +# mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) +# mask_side_length = tf.cast(mask_side_length, tf.int32) +# +# # grid unit sizes +# unit_sizes = tf.random.uniform( +# shape=[batch_size], +# minval=tf.math.minimum(height * 0.5, width * 0.3), +# maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, +# ) +# rectangle_side_length = tf.cast((1 - RATIO) * unit_sizes, tf.int32) +# +# # x and y offsets for grid units +# delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) +# delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) +# delta_x = tf.cast(delta_x * unit_sizes, tf.int32) +# delta_y = tf.cast(delta_y * unit_sizes, tf.int32) +# +# # grid size (number of diagonal units per grid) +# unit_sizes = tf.cast(unit_sizes, tf.int32) +# grid_sizes = mask_side_length // unit_sizes + 1 +# max_grid_size = tf.reduce_max(grid_sizes) +# +# # grid size range per image +# grid_size_range = tf.range(1, max_grid_size + 1) +# grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) +# +# # make broadcastable to grid size ranges +# delta_x = tf.expand_dims(delta_x, 1) +# delta_y = tf.expand_dims(delta_y, 1) +# unit_sizes = tf.expand_dims(unit_sizes, 1) +# rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) +# +# # diagonal corner coordinates +# d_range = grid_size_range * unit_sizes +# x1 = d_range - delta_x +# x0 = x1 - rectangle_side_length +# y1 = d_range - delta_y +# y0 = y1 - rectangle_side_length +# +# # mask coordinates by grid ranges +# d_range_mask = tf.sequence_mask( +# lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 +# ) +# x1 = x1 * d_range_mask +# x0 = x0 * d_range_mask +# y1 = y1 * d_range_mask +# y0 = y0 * d_range_mask +# +# # mesh grid of diagonal top left corner coordinates for each image +# x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) +# y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) +# y0 = tf.transpose(y0, [0, 2, 1]) +# +# # mesh grid of diagonal bottom right corner coordinates for each image +# x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) +# y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) +# y1 = tf.transpose(y1, [0, 2, 1]) +# +# # flatten mesh grids +# x0 = tf.reshape(x0, [-1, max_grid_size]) +# y0 = tf.reshape(y0, [-1, max_grid_size]) +# x1 = tf.reshape(x1, [-1, max_grid_size]) +# y1 = tf.reshape(y1, [-1, max_grid_size]) +# +# # combine coordinates to (x0, y0, x1, y1) +# # with shape (num_rectangles_in_batch, 4) +# corners0 = tf.stack([x0, y0], axis=-1) +# corners1 = tf.stack([x1, y1], axis=-1) +# corners0 = tf.reshape(corners0, [-1, 2]) +# corners1 = tf.reshape(corners1, [-1, 2]) +# corners = tf.concat([corners0, corners1], axis=1) +# +# # make mask for each rectangle +# masks = fill_utils.rectangle_masks(corners, (mask_side_length, mask_side_length)) +# +# # reshape masks into shape +# # (batch_size, rectangles_per_image, mask_height, mask_width) +# masks = tf.reshape( +# masks, +# [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], +# ) +# +# # combine rectangle masks per image +# masks = tf.reduce_any(masks, axis=1) +# +# return masks +# +# +# def _center_crop(masks, width, height): +# masks_shape = tf.shape(masks) +# h_diff = masks_shape[1] - height +# w_diff = masks_shape[2] - width +# +# h_start = tf.cast(h_diff / 2, tf.int32) +# w_start = tf.cast(w_diff / 2, tf.int32) +# return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) # %% -inputs = tf.ones((5, 224, 224, 3)) -masks = _compute_grid_masks(inputs) - -# convert mask to single-channel image -masks = tf.cast(masks, tf.uint8) -masks = tf.expand_dims(masks, axis=-1) +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + +# inputs = tf.ones((BATCH_SIZE, *IMG_SIZE, 3)) +# masks = _compute_grid_masks(inputs) +# +# # convert mask to single-channel image +# masks = tf.cast(masks, tf.uint8) +# masks = tf.expand_dims(masks, axis=-1) +# +# # randomly rotate masks +# rotate = tf.keras.layers.RandomRotation( +# factor=1.0, fill_mode="constant", fill_value=0.0 +# ) +# masks = rotate(masks) +# +# # center crop masks +# input_shape = tf.shape(inputs) +# input_height = input_shape[H_AXIS] +# input_width = input_shape[W_AXIS] +# masks = _center_crop(masks, input_width, input_height) +# +# # convert back to boolean mask +# masks = tf.cast(masks, tf.bool) + +#%% +def resize(image, label, num_classes=10): + image = tf.image.resize(image, IMG_SIZE) + label = tf.one_hot(label, num_classes) + return image, label + + +data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) +train_ds = data["train"] + +num_classes = ds_info.features["label"].num_classes + +train_ds = ( + train_ds.map(lambda x, y: resize(x, y, num_classes=num_classes)) + .shuffle(10 * BATCH_SIZE) + .batch(BATCH_SIZE) +) -# randomly rotate masks -rotate = tf.keras.layers.RandomRotation( - factor=1.0, fill_mode="constant", fill_value=0.0 +gridmask = preprocessing.GridMask( + ratio=0.6, rotation_factor=0.5, fill_mode="gaussian_noise" +) +train_ds = train_ds.map( + lambda x, y: (gridmask(x, training=True), y), + num_parallel_calls=tf.data.AUTOTUNE, ) -masks = rotate(masks) -# center crop masks -input_shape = tf.shape(inputs) -input_height = input_shape[H_AXIS] -input_width = input_shape[W_AXIS] -masks = _center_crop(masks, input_width, input_height) +for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() -# convert back to boolean mask -masks = tf.cast(masks, tf.bool) +# x, y = next(iter(train_ds)) -for m in masks: - plt.imshow(m) - plt.show() +#%% +# images = gridmask(x) + +# plt.figure(figsize=(8, 8)) +# for i in range(9): +# plt.subplot(3, 3, i + 1) +# plt.imshow(images[i].numpy().astype("uint8")) +# plt.axis("off") +# plt.show() diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 91662a4190..de6b829cf9 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -17,6 +17,8 @@ from tensorflow.keras import layers from tensorflow.python.keras.utils import layer_utils +from keras_cv.utils import fill_utils + class GridMask(layers.Layer): """GridMask class for grid-mask augmentation. @@ -85,7 +87,9 @@ def __init__( self.ratio = ratio.lower() self.fill_mode = fill_mode self.fill_value = fill_value - self.random_rotate = layers.RandomRotation(factor=rotation_factor, seed=seed) + self.random_rotate = layers.RandomRotation( + factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed + ) self.seed = seed self._check_parameter_values() @@ -112,118 +116,170 @@ def _check_parameter_values(self): layer_utils.validate_string_arg( fill_mode, - allowable_strings=["constant", "gaussian_noise", "random"], + allowable_strings=["constant", "gaussian_noise"], layer_name="GridMask", arg_name="fill_mode", allow_none=False, allow_callables=False, ) - @staticmethod - def _crop(mask, image_height, image_width): - """crops in middle of mask and image corners.""" - mask_width = mask_height = tf.shape(mask)[0] - mask = mask[ - (mask_height - image_height) // 2 : (mask_height - image_height) // 2 - + image_height, - (mask_width - image_width) // 2 : (mask_width - image_width) // 2 - + image_width, - ] - return mask - - @tf.function - def _compute_mask(self, image_height, image_width): - """mask helper function for initializing grid mask of required size.""" - image_height = tf.cast(image_height, dtype=tf.float32) - image_width = tf.cast(image_width, dtype=tf.float32) - - mask_width = mask_height = tf.cast( - tf.math.maximum(image_height, image_width) * 2.0, dtype=tf.int32 + def _compute_grid_masks(self, inputs): + """Computes grid masks""" + input_shape = tf.shape(inputs) + batch_size = input_shape[0] + height = tf.cast(input_shape[1], tf.float32) + width = tf.cast(input_shape[2], tf.float32) + + # masks side length + squared_w = tf.square(width) + squared_h = tf.square(height) + mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) + mask_side_length = tf.cast(mask_side_length, tf.int32) + + # grid unit sizes + unit_sizes = tf.random.uniform( + shape=[batch_size], + minval=tf.math.minimum(height * 0.5, width * 0.3), + maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, ) - - if self.fill_mode == "constant": - mask = tf.fill([mask_height, mask_width], value=-1) - elif self.fill_mode == "gaussian_noise": - mask = tf.cast(tf.random.normal([mask_height, mask_width]), dtype=tf.int32) - else: - raise ValueError( - "Unsupported fill_mode. `fill_mode` should be 'constant' or " - "'gaussian_noise'." - ) - - gridblock = tf.random.uniform( - shape=[], - minval=int(tf.math.minimum(image_height * 0.5, image_width * 0.3)), - maxval=int(tf.math.maximum(image_height * 0.5, image_width * 0.3)) + 1, - dtype=tf.int32, - seed=self.seed, - ) - if self.ratio == "random": - length = tf.random.uniform( - shape=[], minval=1, maxval=gridblock + 1, dtype=tf.int32, seed=self.seed + ratio = tf.random.uniform( + shape=[], minval=0, maxval=1, dtype=tf.float32, seed=self.seed ) else: - length = tf.cast( - tf.math.minimum( - tf.math.maximum( - int(tf.cast(gridblock, tf.float32) * self.ratio + 0.5), 1 - ), - gridblock - 1, - ), - tf.int32, - ) + ratio = self.ratio + rectangle_side_length = tf.cast((1 - ratio) * unit_sizes, tf.int32) + + # x and y offsets for grid units + delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) + delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) + delta_x = tf.cast(delta_x * unit_sizes, tf.int32) + delta_y = tf.cast(delta_y * unit_sizes, tf.int32) + + # grid size (number of diagonal units per grid) + unit_sizes = tf.cast(unit_sizes, tf.int32) + grid_sizes = mask_side_length // unit_sizes + 1 + max_grid_size = tf.reduce_max(grid_sizes) + + # grid size range per image + grid_size_range = tf.range(1, max_grid_size + 1) + grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) + + # make broadcastable to grid size ranges + delta_x = tf.expand_dims(delta_x, 1) + delta_y = tf.expand_dims(delta_y, 1) + unit_sizes = tf.expand_dims(unit_sizes, 1) + rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) + + # diagonal corner coordinates + d_range = grid_size_range * unit_sizes + x1 = d_range - delta_x + x0 = x1 - rectangle_side_length + y1 = d_range - delta_y + y0 = y1 - rectangle_side_length + + # mask coordinates by grid ranges + d_range_mask = tf.sequence_mask( + lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 + ) + x1 = x1 * d_range_mask + x0 = x0 * d_range_mask + y1 = y1 * d_range_mask + y0 = y0 * d_range_mask + + # mesh grid of diagonal top left corner coordinates for each image + x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) + y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) + y0 = tf.transpose(y0, [0, 2, 1]) + + # mesh grid of diagonal bottom right corner coordinates for each image + x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) + y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) + y1 = tf.transpose(y1, [0, 2, 1]) + + # flatten mesh grids + x0 = tf.reshape(x0, [-1, max_grid_size]) + y0 = tf.reshape(y0, [-1, max_grid_size]) + x1 = tf.reshape(x1, [-1, max_grid_size]) + y1 = tf.reshape(y1, [-1, max_grid_size]) + + # combine coordinates to (x0, y0, x1, y1) + # with shape (num_rectangles_in_batch, 4) + corners0 = tf.stack([x0, y0], axis=-1) + corners1 = tf.stack([x1, y1], axis=-1) + corners0 = tf.reshape(corners0, [-1, 2]) + corners1 = tf.reshape(corners1, [-1, 2]) + corners = tf.concat([corners0, corners1], axis=1) + + # make mask for each rectangle + masks = fill_utils.rectangle_masks( + corners, (mask_side_length, mask_side_length) + ) - for _ in range(2): - start_x = tf.random.uniform( - shape=[], minval=0, maxval=gridblock + 1, dtype=tf.int32, seed=self.seed - ) + # reshape masks into shape + # (batch_size, rectangles_per_image, mask_height, mask_width) + masks = tf.reshape( + masks, + [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], + ) - for i in range(mask_width // gridblock): - start = gridblock * i + start_x - end = tf.math.minimum(start + length, mask_width) - indices = tf.reshape(tf.range(start, end), [end - start, 1]) - updates = tf.fill([end - start, mask_width], value=self.fill_value) - mask = tf.tensor_scatter_nd_update(mask, indices, updates) - mask = tf.transpose(mask) + # combine rectangle masks per image + masks = tf.reduce_any(masks, axis=1) - return tf.equal(mask, self.fill_value) + return masks - @tf.function - def _grid_mask(self, image): - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] + def _center_crop(self, masks, width, height): + masks_shape = tf.shape(masks) + h_diff = masks_shape[1] - height + w_diff = masks_shape[2] - width - grid = self._compute_mask(image_height, image_width) - grid = self.random_rotate(tf.cast(grid[:, :, tf.newaxis], tf.float32)) + h_start = tf.cast(h_diff / 2, tf.int32) + w_start = tf.cast(w_diff / 2, tf.int32) + return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) - mask = tf.reshape( - tf.cast(self._crop(grid, image_height, image_width), dtype=image.dtype), - (image_height, image_width), - ) - mask = tf.expand_dims(mask, -1) if image._rank() != mask._rank() else mask + def _grid_mask(self, images): + # compute grid masks + masks = self._compute_grid_masks(images) - if self.fill_mode == "constant": - return tf.where(tf.cast(mask, tf.bool), image, self.fill_value) - else: - return mask * image - - def _augment_images(self, images): - unbatched = images.shape.rank == 3 + # convert masks to single-channel images + masks = tf.cast(masks, tf.uint8) + masks = tf.expand_dims(masks, axis=-1) - # The transform op only accepts rank 4 inputs, so if we have an unbatched - # image, we need to temporarily expand dims to a batch. - if unbatched: - images = tf.expand_dims(images, axis=0) + # randomly rotate masks + masks = self.random_rotate(masks) - # TODO: Make the batch operation vectorize. - output = tf.map_fn(lambda image: self._grid_mask(image), images) + # center crop masks + input_shape = tf.shape(images) + input_height = input_shape[1] + input_width = input_shape[2] + masks = self._center_crop(masks, input_width, input_height) - if unbatched: - output = tf.squeeze(output, axis=0) - return output + # convert back to boolean mask + masks = tf.cast(masks, tf.bool) - def call(self, images, training=None): + if self.fill_mode == "constant": + fill_value = tf.fill(input_shape, self.fill_value) + else: + # gaussian noise + fill_value = tf.random.normal(input_shape) + + return tf.where(masks, fill_value, images) + + # def _augment_images(self, images): + # unbatched = images.shape.rank == 3 + # + # # The transform op only accepts rank 4 inputs, so if we have an unbatched + # # image, we need to temporarily expand dims to a batch. + # if unbatched: + # images = tf.expand_dims(images, axis=0) + # + # output = self._grid_mask(images) + # + # if unbatched: + # output = tf.squeeze(output, axis=0) + # return output + + def call(self, images, training=True): """call method for the GridMask layer. Args: @@ -237,9 +293,10 @@ def call(self, images, training=None): if training is None: training = backend.learning_phase() - if not training: - return images - return self._augment_images(images) + if training: + images = self._grid_mask(images) + + return images def get_config(self): config = { From ac8fa28b9b91589b1eb6ed58d4482d50f2a6ad4c Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 15 Feb 2022 19:29:46 +0100 Subject: [PATCH 15/43] initial vectorized layer --- grid_mask_driver.py | 198 -------------------------------------------- 1 file changed, 198 deletions(-) delete mode 100644 grid_mask_driver.py diff --git a/grid_mask_driver.py b/grid_mask_driver.py deleted file mode 100644 index 069cf2b856..0000000000 --- a/grid_mask_driver.py +++ /dev/null @@ -1,198 +0,0 @@ -"""gridmask_demo.py shows how to use the GridMask preprocessing layer. - -Operates on the oxford_flowers102 dataset. In this script the flowers -are loaded, then are passed through the preprocessing layers. -Finally, they are shown using matplotlib. -""" - -import matplotlib.pyplot as plt -import tensorflow as tf -import tensorflow_datasets as tfds - -from keras_cv.utils import fill_utils -from keras_cv.layers import preprocessing - -RATIO = 0.6 -H_AXIS = -3 -W_AXIS = -2 - - -# def _compute_grid_masks(inputs): -# """Computes grid masks""" -# input_shape = tf.shape(inputs) -# batch_size = input_shape[0] -# height = tf.cast(input_shape[H_AXIS], tf.float32) -# width = tf.cast(input_shape[W_AXIS], tf.float32) -# -# # masks side length -# squared_w = tf.square(width) -# squared_h = tf.square(height) -# mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) -# mask_side_length = tf.cast(mask_side_length, tf.int32) -# -# # grid unit sizes -# unit_sizes = tf.random.uniform( -# shape=[batch_size], -# minval=tf.math.minimum(height * 0.5, width * 0.3), -# maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, -# ) -# rectangle_side_length = tf.cast((1 - RATIO) * unit_sizes, tf.int32) -# -# # x and y offsets for grid units -# delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) -# delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) -# delta_x = tf.cast(delta_x * unit_sizes, tf.int32) -# delta_y = tf.cast(delta_y * unit_sizes, tf.int32) -# -# # grid size (number of diagonal units per grid) -# unit_sizes = tf.cast(unit_sizes, tf.int32) -# grid_sizes = mask_side_length // unit_sizes + 1 -# max_grid_size = tf.reduce_max(grid_sizes) -# -# # grid size range per image -# grid_size_range = tf.range(1, max_grid_size + 1) -# grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) -# -# # make broadcastable to grid size ranges -# delta_x = tf.expand_dims(delta_x, 1) -# delta_y = tf.expand_dims(delta_y, 1) -# unit_sizes = tf.expand_dims(unit_sizes, 1) -# rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) -# -# # diagonal corner coordinates -# d_range = grid_size_range * unit_sizes -# x1 = d_range - delta_x -# x0 = x1 - rectangle_side_length -# y1 = d_range - delta_y -# y0 = y1 - rectangle_side_length -# -# # mask coordinates by grid ranges -# d_range_mask = tf.sequence_mask( -# lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 -# ) -# x1 = x1 * d_range_mask -# x0 = x0 * d_range_mask -# y1 = y1 * d_range_mask -# y0 = y0 * d_range_mask -# -# # mesh grid of diagonal top left corner coordinates for each image -# x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) -# y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) -# y0 = tf.transpose(y0, [0, 2, 1]) -# -# # mesh grid of diagonal bottom right corner coordinates for each image -# x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) -# y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) -# y1 = tf.transpose(y1, [0, 2, 1]) -# -# # flatten mesh grids -# x0 = tf.reshape(x0, [-1, max_grid_size]) -# y0 = tf.reshape(y0, [-1, max_grid_size]) -# x1 = tf.reshape(x1, [-1, max_grid_size]) -# y1 = tf.reshape(y1, [-1, max_grid_size]) -# -# # combine coordinates to (x0, y0, x1, y1) -# # with shape (num_rectangles_in_batch, 4) -# corners0 = tf.stack([x0, y0], axis=-1) -# corners1 = tf.stack([x1, y1], axis=-1) -# corners0 = tf.reshape(corners0, [-1, 2]) -# corners1 = tf.reshape(corners1, [-1, 2]) -# corners = tf.concat([corners0, corners1], axis=1) -# -# # make mask for each rectangle -# masks = fill_utils.rectangle_masks(corners, (mask_side_length, mask_side_length)) -# -# # reshape masks into shape -# # (batch_size, rectangles_per_image, mask_height, mask_width) -# masks = tf.reshape( -# masks, -# [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], -# ) -# -# # combine rectangle masks per image -# masks = tf.reduce_any(masks, axis=1) -# -# return masks -# -# -# def _center_crop(masks, width, height): -# masks_shape = tf.shape(masks) -# h_diff = masks_shape[1] - height -# w_diff = masks_shape[2] - width -# -# h_start = tf.cast(h_diff / 2, tf.int32) -# w_start = tf.cast(w_diff / 2, tf.int32) -# return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) - - -# %% -IMG_SIZE = (224, 224) -BATCH_SIZE = 64 - -# inputs = tf.ones((BATCH_SIZE, *IMG_SIZE, 3)) -# masks = _compute_grid_masks(inputs) -# -# # convert mask to single-channel image -# masks = tf.cast(masks, tf.uint8) -# masks = tf.expand_dims(masks, axis=-1) -# -# # randomly rotate masks -# rotate = tf.keras.layers.RandomRotation( -# factor=1.0, fill_mode="constant", fill_value=0.0 -# ) -# masks = rotate(masks) -# -# # center crop masks -# input_shape = tf.shape(inputs) -# input_height = input_shape[H_AXIS] -# input_width = input_shape[W_AXIS] -# masks = _center_crop(masks, input_width, input_height) -# -# # convert back to boolean mask -# masks = tf.cast(masks, tf.bool) - -#%% -def resize(image, label, num_classes=10): - image = tf.image.resize(image, IMG_SIZE) - label = tf.one_hot(label, num_classes) - return image, label - - -data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) -train_ds = data["train"] - -num_classes = ds_info.features["label"].num_classes - -train_ds = ( - train_ds.map(lambda x, y: resize(x, y, num_classes=num_classes)) - .shuffle(10 * BATCH_SIZE) - .batch(BATCH_SIZE) -) - -gridmask = preprocessing.GridMask( - ratio=0.6, rotation_factor=0.5, fill_mode="gaussian_noise" -) -train_ds = train_ds.map( - lambda x, y: (gridmask(x, training=True), y), - num_parallel_calls=tf.data.AUTOTUNE, -) - -for images, labels in train_ds.take(1): - plt.figure(figsize=(8, 8)) - for i in range(9): - plt.subplot(3, 3, i + 1) - plt.imshow(images[i].numpy().astype("uint8")) - plt.axis("off") - plt.show() - -# x, y = next(iter(train_ds)) - -#%% -# images = gridmask(x) - -# plt.figure(figsize=(8, 8)) -# for i in range(9): -# plt.subplot(3, 3, i + 1) -# plt.imshow(images[i].numpy().astype("uint8")) -# plt.axis("off") -# plt.show() From 60759ce803abe964afeae82e70395e0d07a279d4 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 15 Feb 2022 20:29:22 +0100 Subject: [PATCH 16/43] debug memory usage --- examples/layers/preprocessing/grid_mask_demo.py | 2 +- keras_cv/layers/preprocessing/grid_mask.py | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/examples/layers/preprocessing/grid_mask_demo.py b/examples/layers/preprocessing/grid_mask_demo.py index 4b54470536..e2dbdcd366 100644 --- a/examples/layers/preprocessing/grid_mask_demo.py +++ b/examples/layers/preprocessing/grid_mask_demo.py @@ -34,7 +34,7 @@ def main(): ) gridmask = preprocessing.GridMask( - ratio="random", rotation_factor=0.5, fill_mode="gaussian_noise" + ratio=0.1, rotation_factor=0.5, fill_mode="gaussian_noise" ) train_ds = train_ds.map( lambda x, y: (gridmask(x), y), diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index de6b829cf9..ee43b4eb54 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -265,20 +265,6 @@ def _grid_mask(self, images): return tf.where(masks, fill_value, images) - # def _augment_images(self, images): - # unbatched = images.shape.rank == 3 - # - # # The transform op only accepts rank 4 inputs, so if we have an unbatched - # # image, we need to temporarily expand dims to a batch. - # if unbatched: - # images = tf.expand_dims(images, axis=0) - # - # output = self._grid_mask(images) - # - # if unbatched: - # output = tf.squeeze(output, axis=0) - # return output - def call(self, images, training=True): """call method for the GridMask layer. From fb6cfcd2e8406e6c385ce62c58514eb011c7bb1d Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 15 Feb 2022 21:03:41 +0100 Subject: [PATCH 17/43] fix tests --- keras_cv/layers/preprocessing/grid_mask_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 65deda642b..4cd8a7f368 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -36,7 +36,7 @@ def test_gridmask_call_results_one_channel(self): dtype=tf.float32, ) - fill_value = 0 + fill_value = 0.0 layer = GridMask( ratio=0.3, rotation_factor=(0.2, 0.3), @@ -60,7 +60,7 @@ def test_non_square_image(self): dtype=tf.float32, ) - fill_value = 100 + fill_value = 100.0 layer = GridMask( ratio=0.6, rotation_factor=0.3, fill_mode="constant", fill_value=fill_value ) @@ -78,7 +78,7 @@ def test_in_tf_function(self): dtype=tf.float32, ) - fill_value = 255 + fill_value = 255.0 layer = GridMask( ratio=0.4, rotation_factor=0.5, fill_mode="constant", fill_value=fill_value ) @@ -104,6 +104,7 @@ def test_in_single_image(self): layer = GridMask( ratio="random", fill_mode="gaussian_noise", + fill_value=0.0 ) xs = layer(xs, training=True) self.assertTrue(tf.math.reduce_any(xs == 0.0)) From 79b8387998d8cdf9d2cf3c4eb9e1f6c19e6215cc Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Wed, 16 Feb 2022 18:53:36 +0100 Subject: [PATCH 18/43] remove support for single image --- keras_cv/layers/preprocessing/grid_mask.py | 2 +- keras_cv/layers/preprocessing/grid_mask_test.py | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index ee43b4eb54..9a01cc3ddf 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -77,7 +77,7 @@ def __init__( ratio="random", rotation_factor=0.15, fill_mode="constant", - fill_value=0, + fill_value=0.0, seed=None, **kwargs, ): diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 4cd8a7f368..9c2ed672b9 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -94,18 +94,3 @@ def augment(x): self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) self.assertTrue(tf.math.reduce_any(xs[1] == float(fill_value))) self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) - - def test_in_single_image(self): - xs = tf.cast( - tf.ones((512, 512, 1)), - dtype=tf.float32, - ) - - layer = GridMask( - ratio="random", - fill_mode="gaussian_noise", - fill_value=0.0 - ) - xs = layer(xs, training=True) - self.assertTrue(tf.math.reduce_any(xs == 0.0)) - self.assertTrue(tf.math.reduce_any(xs == 1.0)) From 80c080ad0ac756b45b9f06d1d79484a69092840d Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Wed, 16 Feb 2022 21:10:21 +0100 Subject: [PATCH 19/43] set ratio random in demo --- examples/layers/preprocessing/grid_mask_demo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/layers/preprocessing/grid_mask_demo.py b/examples/layers/preprocessing/grid_mask_demo.py index e2dbdcd366..4b54470536 100644 --- a/examples/layers/preprocessing/grid_mask_demo.py +++ b/examples/layers/preprocessing/grid_mask_demo.py @@ -34,7 +34,7 @@ def main(): ) gridmask = preprocessing.GridMask( - ratio=0.1, rotation_factor=0.5, fill_mode="gaussian_noise" + ratio="random", rotation_factor=0.5, fill_mode="gaussian_noise" ) train_ds = train_ds.map( lambda x, y: (gridmask(x), y), From aa55af53bc5bc706bf231a7d4cb5df521139209d Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 19 Feb 2022 01:41:16 +0100 Subject: [PATCH 20/43] minimize memory by reducing number of simultaneous Logical And operations. --- keras_cv/utils/fill_utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index 39933ead2d..193f2fc5b8 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -53,12 +53,10 @@ def rectangle_masks(corners, mask_shape): range_col = tf.repeat(range_col[tf.newaxis, tf.newaxis, :], batch_size, 0) # boolean masks - mask_x0 = tf.less_equal(x0_rep, range_col) - mask_y0 = tf.less_equal(y0_rep, range_row) - mask_x1 = tf.less(range_col, x1_rep) - mask_y1 = tf.less(range_row, y1_rep) - - masks = mask_x0 & mask_y0 & mask_x1 & mask_y1 + masks = tf.less_equal(x0_rep, range_col) + masks = masks & tf.less_equal(y0_rep, range_row) + masks = masks & tf.less(range_col, x1_rep) + masks = masks & tf.less(range_row, y1_rep) return masks From a1d5f0320274d579d68cb07474e3fc7359a4269d Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 19 Feb 2022 03:44:08 +0100 Subject: [PATCH 21/43] individual ratio for each image when ratio="random" --- keras_cv/layers/preprocessing/grid_mask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 9a01cc3ddf..2eaea2b029 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -144,7 +144,7 @@ def _compute_grid_masks(self, inputs): ) if self.ratio == "random": ratio = tf.random.uniform( - shape=[], minval=0, maxval=1, dtype=tf.float32, seed=self.seed + shape=[batch_size], minval=0, maxval=1, dtype=tf.float32, seed=self.seed ) else: ratio = self.ratio From 3e8d0c472b8e90155c9cdfc4d19d87b848b07be5 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 19 Feb 2022 04:08:07 +0100 Subject: [PATCH 22/43] add vectorized argument --- keras_cv/layers/preprocessing/grid_mask.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 2eaea2b029..8d0da2c15f 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -78,6 +78,7 @@ def __init__( rotation_factor=0.15, fill_mode="constant", fill_value=0.0, + vectorized=True, seed=None, **kwargs, ): @@ -90,6 +91,7 @@ def __init__( self.random_rotate = layers.RandomRotation( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) + self.vectorized = vectorized self.seed = seed self._check_parameter_values() @@ -239,7 +241,15 @@ def _center_crop(self, masks, width, height): def _grid_mask(self, images): # compute grid masks - masks = self._compute_grid_masks(images) + if self.vectorized: + masks = self._compute_grid_masks(images) + else: + masks = tf.map_fn( + self._compute_grid_masks, + tf.expand_dims(images, 1), + fn_output_signature=tf.TensorSpec(shape=(1, None, None), dtype=tf.bool), + ) + masks = tf.squeeze(masks, 1) # convert masks to single-channel images masks = tf.cast(masks, tf.uint8) @@ -290,6 +300,7 @@ def get_config(self): "rotation_factor": self.rotation_factor, "fill_mode": self.fill_mode, "fill_value": self.fill_value, + "vectorized": self.vectorized, "seed": self.seed, } base_config = super().get_config() From 4a033ab88a89211b71489a0afa57c49b4dfcb5ae Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Tue, 22 Feb 2022 02:09:53 +0100 Subject: [PATCH 23/43] use float32 instead of int32 --- keras_cv/layers/preprocessing/grid_mask.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 8d0da2c15f..4c4dab2df7 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -136,13 +136,13 @@ def _compute_grid_masks(self, inputs): squared_w = tf.square(width) squared_h = tf.square(height) mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) - mask_side_length = tf.cast(mask_side_length, tf.int32) # grid unit sizes unit_sizes = tf.random.uniform( shape=[batch_size], minval=tf.math.minimum(height * 0.5, width * 0.3), maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, + dtype=tf.float32 ) if self.ratio == "random": ratio = tf.random.uniform( @@ -150,16 +150,15 @@ def _compute_grid_masks(self, inputs): ) else: ratio = self.ratio - rectangle_side_length = tf.cast((1 - ratio) * unit_sizes, tf.int32) + rectangle_side_length = tf.cast((1 - ratio) * unit_sizes, tf.float32) # x and y offsets for grid units delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) - delta_x = tf.cast(delta_x * unit_sizes, tf.int32) - delta_y = tf.cast(delta_y * unit_sizes, tf.int32) + delta_x = delta_x * unit_sizes + delta_y = delta_y * unit_sizes # grid size (number of diagonal units per grid) - unit_sizes = tf.cast(unit_sizes, tf.int32) grid_sizes = mask_side_length // unit_sizes + 1 max_grid_size = tf.reduce_max(grid_sizes) @@ -182,7 +181,7 @@ def _compute_grid_masks(self, inputs): # mask coordinates by grid ranges d_range_mask = tf.sequence_mask( - lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.int32 + lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.float32 ) x1 = x1 * d_range_mask x0 = x0 * d_range_mask @@ -214,6 +213,7 @@ def _compute_grid_masks(self, inputs): corners = tf.concat([corners0, corners1], axis=1) # make mask for each rectangle + mask_side_length = tf.cast(mask_side_length, tf.int32) masks = fill_utils.rectangle_masks( corners, (mask_side_length, mask_side_length) ) @@ -252,7 +252,7 @@ def _grid_mask(self, images): masks = tf.squeeze(masks, 1) # convert masks to single-channel images - masks = tf.cast(masks, tf.uint8) + masks = tf.cast(masks, tf.float32) masks = tf.expand_dims(masks, axis=-1) # randomly rotate masks From de60f963ff9ca2fcae3cceeb55641aa33ece5d6e Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Thu, 24 Feb 2022 23:36:32 +0100 Subject: [PATCH 24/43] use float32 instead of int32 --- keras_cv/layers/preprocessing/grid_mask.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 4c4dab2df7..2baf106790 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -220,6 +220,7 @@ def _compute_grid_masks(self, inputs): # reshape masks into shape # (batch_size, rectangles_per_image, mask_height, mask_width) + mask_side_length = tf.cast(mask_side_length, tf.float32) masks = tf.reshape( masks, [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], From 48df6668ea57093c9b22f65eff90a9792eb8c67c Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Thu, 24 Feb 2022 23:38:45 +0100 Subject: [PATCH 25/43] remove vectorized arg --- keras_cv/layers/preprocessing/grid_mask.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 2baf106790..4e72a6ac01 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -78,7 +78,6 @@ def __init__( rotation_factor=0.15, fill_mode="constant", fill_value=0.0, - vectorized=True, seed=None, **kwargs, ): @@ -91,7 +90,6 @@ def __init__( self.random_rotate = layers.RandomRotation( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) - self.vectorized = vectorized self.seed = seed self._check_parameter_values() @@ -242,15 +240,7 @@ def _center_crop(self, masks, width, height): def _grid_mask(self, images): # compute grid masks - if self.vectorized: - masks = self._compute_grid_masks(images) - else: - masks = tf.map_fn( - self._compute_grid_masks, - tf.expand_dims(images, 1), - fn_output_signature=tf.TensorSpec(shape=(1, None, None), dtype=tf.bool), - ) - masks = tf.squeeze(masks, 1) + masks = self._compute_grid_masks(images) # convert masks to single-channel images masks = tf.cast(masks, tf.float32) @@ -301,7 +291,6 @@ def get_config(self): "rotation_factor": self.rotation_factor, "fill_mode": self.fill_mode, "fill_value": self.fill_value, - "vectorized": self.vectorized, "seed": self.seed, } base_config = super().get_config() From a9f8700bcf28cfddaa207e1e6a58207e956a6b9c Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Fri, 25 Feb 2022 00:30:46 +0100 Subject: [PATCH 26/43] minor refactor --- keras_cv/layers/preprocessing/grid_mask.py | 41 +++++++++++----------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 4e72a6ac01..d3c180c3d6 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -123,24 +123,25 @@ def _check_parameter_values(self): allow_callables=False, ) - def _compute_grid_masks(self, inputs): + def _compute_rectangle_coordinates(self, unit_sizes, mask_side_len): + pass + + def _compute_grid_masks(self, input_shape): """Computes grid masks""" - input_shape = tf.shape(inputs) batch_size = input_shape[0] height = tf.cast(input_shape[1], tf.float32) width = tf.cast(input_shape[2], tf.float32) # masks side length - squared_w = tf.square(width) - squared_h = tf.square(height) - mask_side_length = tf.math.ceil(tf.sqrt(squared_w + squared_h)) + input_diagonal_len = tf.sqrt(tf.square(width) + tf.square(height)) + mask_side_len = tf.math.ceil(input_diagonal_len) # grid unit sizes unit_sizes = tf.random.uniform( shape=[batch_size], minval=tf.math.minimum(height * 0.5, width * 0.3), maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, - dtype=tf.float32 + dtype=tf.float32, ) if self.ratio == "random": ratio = tf.random.uniform( @@ -148,7 +149,7 @@ def _compute_grid_masks(self, inputs): ) else: ratio = self.ratio - rectangle_side_length = tf.cast((1 - ratio) * unit_sizes, tf.float32) + rectangle_side_len = tf.cast((1 - ratio) * unit_sizes, tf.float32) # x and y offsets for grid units delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) @@ -157,7 +158,7 @@ def _compute_grid_masks(self, inputs): delta_y = delta_y * unit_sizes # grid size (number of diagonal units per grid) - grid_sizes = mask_side_length // unit_sizes + 1 + grid_sizes = mask_side_len // unit_sizes + 1 max_grid_size = tf.reduce_max(grid_sizes) # grid size range per image @@ -168,16 +169,17 @@ def _compute_grid_masks(self, inputs): delta_x = tf.expand_dims(delta_x, 1) delta_y = tf.expand_dims(delta_y, 1) unit_sizes = tf.expand_dims(unit_sizes, 1) - rectangle_side_length = tf.expand_dims(rectangle_side_length, 1) + rectangle_side_len = tf.expand_dims(rectangle_side_len, 1) # diagonal corner coordinates d_range = grid_size_range * unit_sizes x1 = d_range - delta_x - x0 = x1 - rectangle_side_length + x0 = x1 - rectangle_side_len y1 = d_range - delta_y - y0 = y1 - rectangle_side_length + y0 = y1 - rectangle_side_len - # mask coordinates by grid ranges + # not every input has the same grid size (its random), + # so we mask out some of the coordinates for smaller grids. d_range_mask = tf.sequence_mask( lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.float32 ) @@ -211,17 +213,15 @@ def _compute_grid_masks(self, inputs): corners = tf.concat([corners0, corners1], axis=1) # make mask for each rectangle - mask_side_length = tf.cast(mask_side_length, tf.int32) - masks = fill_utils.rectangle_masks( - corners, (mask_side_length, mask_side_length) - ) + mask_side_len = tf.cast(mask_side_len, tf.int32) + masks = fill_utils.rectangle_masks(corners, (mask_side_len, mask_side_len)) # reshape masks into shape # (batch_size, rectangles_per_image, mask_height, mask_width) - mask_side_length = tf.cast(mask_side_length, tf.float32) + mask_side_len = tf.cast(mask_side_len, tf.float32) masks = tf.reshape( masks, - [-1, max_grid_size * max_grid_size, mask_side_length, mask_side_length], + [-1, max_grid_size * max_grid_size, mask_side_len, mask_side_len], ) # combine rectangle masks per image @@ -239,8 +239,10 @@ def _center_crop(self, masks, width, height): return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) def _grid_mask(self, images): + input_shape = tf.shape(images) + # compute grid masks - masks = self._compute_grid_masks(images) + masks = self._compute_grid_masks(input_shape) # convert masks to single-channel images masks = tf.cast(masks, tf.float32) @@ -250,7 +252,6 @@ def _grid_mask(self, images): masks = self.random_rotate(masks) # center crop masks - input_shape = tf.shape(images) input_height = input_shape[1] input_width = input_shape[2] masks = self._center_crop(masks, input_width, input_height) From bbf897736b26d1e6e57b6f5e55e3439b342fd500 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Fri, 25 Feb 2022 01:10:09 +0100 Subject: [PATCH 27/43] minor refactor --- keras_cv/layers/preprocessing/grid_mask.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index d3c180c3d6..e460ebd56c 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -123,9 +123,6 @@ def _check_parameter_values(self): allow_callables=False, ) - def _compute_rectangle_coordinates(self, unit_sizes, mask_side_len): - pass - def _compute_grid_masks(self, input_shape): """Computes grid masks""" batch_size = input_shape[0] From 54416ff9b5652c3dc4a1510757481568fb12e3cd Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Fri, 25 Feb 2022 01:17:04 +0100 Subject: [PATCH 28/43] support single image --- keras_cv/layers/preprocessing/grid_mask.py | 7 ++++++- keras_cv/layers/preprocessing/grid_mask_test.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index e460ebd56c..e73eadeec1 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -279,7 +279,12 @@ def call(self, images, training=True): training = backend.learning_phase() if training: - images = self._grid_mask(images) + if images.shape.rank == 3: + images = tf.expand_dims(images, axis=0) + images = self._grid_mask(images) + images = tf.squeeze(images, axis=0) + else: + images = self._grid_mask(images) return images diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 9c2ed672b9..4e5f732af3 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -94,3 +94,18 @@ def augment(x): self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) self.assertTrue(tf.math.reduce_any(xs[1] == float(fill_value))) self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) + + def test_in_single_image(self): + xs = tf.cast( + tf.ones((512, 512, 1)), + dtype=tf.float32, + ) + + layer = GridMask( + ratio="random", + fill_mode="constant", + fill_value=0.0 + ) + xs = layer(xs, training=True) + self.assertTrue(tf.math.reduce_any(xs == 0.0)) + self.assertTrue(tf.math.reduce_any(xs == 1.0)) \ No newline at end of file From 783b5c42c1489553966263d36eeed898463a8e4c Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 26 Feb 2022 02:54:32 +0100 Subject: [PATCH 29/43] refactor coordinates to mask --- keras_cv/utils/fill_utils.py | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index 193f2fc5b8..1d46ff6288 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -16,6 +16,59 @@ from keras_cv.utils import bbox +def _axis_mask(axis_lengths, offsets, mask_len): + axis_mask = tf.sequence_mask(axis_lengths, mask_len) + rev_lengths = tf.minimum(offsets + axis_lengths, mask_len) + axis_mask = tf.reverse_sequence(axis_mask, rev_lengths, seq_axis=1) + return axis_mask + + +def xywh_to_mask(xywh, mask_shape): + width, height = mask_shape + cx = xywh[:, 0] + cy = xywh[:, 1] + w = xywh[:, 2] + h = xywh[:, 3] + x0 = cx - (w / 2) + y0 = cy - (h / 2) + + w = tf.cast(w, tf.int32) + h = tf.cast(h, tf.int32) + x0 = tf.cast(x0, tf.int32) + y0 = tf.cast(y0, tf.int32) + w_mask = _axis_mask(w, x0, width) + h_mask = _axis_mask(h, y0, height) + + w_mask = tf.expand_dims(w_mask, axis=-2) + h_mask = tf.expand_dims(h_mask, axis=-1) + masks = tf.logical_and(w_mask, h_mask) + + return masks + + +def corners_to_mask(corners, mask_shape): + width, height = mask_shape + x0 = corners[:, 0] + y0 = corners[:, 1] + x1 = corners[:, 2] + y1 = corners[:, 3] + w = x1 - x0 + h = y1 - y0 + + w = tf.cast(w, tf.int32) + h = tf.cast(h, tf.int32) + x0 = tf.cast(x0, tf.int32) + y0 = tf.cast(y0, tf.int32) + w_mask = _axis_mask(w, x0, width) + h_mask = _axis_mask(h, y0, height) + + w_mask = tf.expand_dims(w_mask, axis=-2) + h_mask = tf.expand_dims(h_mask, axis=-1) + masks = tf.logical_and(w_mask, h_mask) + + return masks + + def rectangle_masks(corners, mask_shape): """Computes masks of rectangles From 25f706b13b2eeeeb11ed7355d12fc47f204340ea Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 26 Feb 2022 18:15:53 +0100 Subject: [PATCH 30/43] merge master --- .github/API_DESIGN.md | 12 +- keras_cv/layers/preprocessing/cut_mix.py | 4 +- keras_cv/layers/preprocessing/grid_mask.py | 14 +- .../layers/preprocessing/random_cutout.py | 18 +- keras_cv/metrics/__init__.py | 1 + keras_cv/metrics/coco/__init__.py | 1 + .../metrics/coco/mean_average_precision.py | 317 ++++++++++++++++++ .../coco/mean_average_precision_test.py | 160 +++++++++ .../numerical_tests/GenerateSamples.ipynb | 44 +-- .../mean_average_precision_test.py | 131 ++++++++ .../recall_correctness_test.py | 6 +- keras_cv/metrics/coco/recall.py | 14 +- keras_cv/metrics/coco/utils.py | 39 +-- keras_cv/metrics/coco/utils_test.py | 100 +++--- keras_cv/utils/__init__.py | 2 +- keras_cv/utils/bbox_test.py | 94 ------ keras_cv/utils/{bbox.py => bounding_box.py} | 57 ++-- keras_cv/utils/bounding_box_test.py | 116 +++++++ keras_cv/utils/fill_utils.py | 4 +- keras_cv/utils/iou.py | 7 +- 20 files changed, 897 insertions(+), 244 deletions(-) create mode 100644 keras_cv/metrics/coco/mean_average_precision.py create mode 100644 keras_cv/metrics/coco/mean_average_precision_test.py create mode 100644 keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py delete mode 100644 keras_cv/utils/bbox_test.py rename keras_cv/utils/{bbox.py => bounding_box.py} (65%) create mode 100644 keras_cv/utils/bounding_box_test.py diff --git a/.github/API_DESIGN.md b/.github/API_DESIGN.md index edc504727d..ec300b3099 100644 --- a/.github/API_DESIGN.md +++ b/.github/API_DESIGN.md @@ -4,12 +4,20 @@ In general, KerasCV abides to the [API design guidelines of Keras](https://gith There are a few API guidelines that apply only to KerasCV. These are discussed in this document. +## Label Names +When working with `bounding_box` and `segmentation_map` labels the abbreviations `bbox` and +`segm` are often used. In KerasCV, we will *not* be using these abbreviations. This is done +to ensure full consistency in our naming convention. While the team is fond of the abbreviation +`bbox`, we are loss fond of `segm`. In order to ensure full consistency, we have decided to +use the full names for label types in our code base. + ## Preprocessing Layers ### Color Based Preprocessing Layers Some preprocessing layers in KerasCV perform color based transformations. This includes `RandomBrightness`, `Equalize`, `Solarization`, and more. Preprocessing layers that perform color based transformations make the following assumptions: -- input images are represented in pixel space, with values in the range [0,255] +- these layers must accept a `value_range`, which is a tuple of numbers. +- `value_range` must default to `(0, 255)` - input images may be of any `dtype` Additionally, these preprocessing layers should cast back to the input images @@ -22,4 +30,4 @@ some Keras layers cast user inputs without the user knowing. For example, if was accidentally casting inputs to `float32`, it would be a bad user experience to raise an error asserting that all inputs must be of type `int`. -New preprocessing layers should be consistent with these decisions. \ No newline at end of file +New preprocessing layers should be consistent with these decisions. diff --git a/keras_cv/layers/preprocessing/cut_mix.py b/keras_cv/layers/preprocessing/cut_mix.py index 65dd49a1a1..65ba3da2b0 100644 --- a/keras_cv/layers/preprocessing/cut_mix.py +++ b/keras_cv/layers/preprocessing/cut_mix.py @@ -105,8 +105,8 @@ def _cutmix(self, images, labels): shape=[batch_size], minval=0, maxval=image_width, dtype=tf.int32 ) - bbox_area = cut_height * cut_width - lambda_sample = 1.0 - bbox_area / (image_height * image_width) + bounding_box_area = cut_height * cut_width + lambda_sample = 1.0 - bounding_box_area / (image_height * image_width) lambda_sample = tf.cast(lambda_sample, dtype=tf.float32) images = fill_utils.fill_rectangle( diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index e73eadeec1..41140a6f70 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -15,7 +15,6 @@ import tensorflow as tf from tensorflow.keras import backend from tensorflow.keras import layers -from tensorflow.python.keras.utils import layer_utils from keras_cv.utils import fill_utils @@ -114,14 +113,11 @@ def _check_parameter_values(self): f"fill_value should be in the range [0, 255]. Got {fill_value}" ) - layer_utils.validate_string_arg( - fill_mode, - allowable_strings=["constant", "gaussian_noise"], - layer_name="GridMask", - arg_name="fill_mode", - allow_none=False, - allow_callables=False, - ) + if fill_mode not in ["constant", "gaussian_noise", "random"]: + raise ValueError( + '`fill_mode` should be "constant", ' + f'"gaussian_noise", or "random". Got `fill_mode`={fill_mode}' + ) def _compute_grid_masks(self, input_shape): """Computes grid masks""" diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py index 6cc5706f42..7944022ceb 100644 --- a/keras_cv/layers/preprocessing/random_cutout.py +++ b/keras_cv/layers/preprocessing/random_cutout.py @@ -14,7 +14,6 @@ import tensorflow as tf import tensorflow.keras.layers as layers from tensorflow.keras import backend -from tensorflow.python.keras.utils import layer_utils from keras_cv.utils import fill_utils @@ -63,22 +62,19 @@ def __init__( fill_mode="constant", fill_value=0.0, seed=None, - **kwargs + **kwargs, ): super().__init__(**kwargs) - layer_utils.validate_string_arg( - fill_mode, - allowable_strings=["constant", "gaussian_noise"], - layer_name="RandomCutout", - arg_name="fill_mode", - allow_none=False, - allow_callables=False, - ) - self.height_lower, self.height_upper = self._parse_bounds(height_factor) self.width_lower, self.width_upper = self._parse_bounds(width_factor) + if fill_mode not in ["gaussian_noise", "constant"]: + raise ValueError( + '`fill_mode` should be "gaussian_noise" ' + f'or "constant". Got `fill_mode`={fill_mode}' + ) + if not isinstance(self.height_lower, type(self.height_upper)): raise ValueError( "`height_factor` must have lower bound and upper bound " diff --git a/keras_cv/metrics/__init__.py b/keras_cv/metrics/__init__.py index 13b9c0007a..593d3dbb93 100644 --- a/keras_cv/metrics/__init__.py +++ b/keras_cv/metrics/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. +from keras_cv.metrics.coco.mean_average_precision import COCOMeanAveragePrecision from keras_cv.metrics.coco.recall import COCORecall diff --git a/keras_cv/metrics/coco/__init__.py b/keras_cv/metrics/coco/__init__.py index 13b9c0007a..593d3dbb93 100644 --- a/keras_cv/metrics/coco/__init__.py +++ b/keras_cv/metrics/coco/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. +from keras_cv.metrics.coco.mean_average_precision import COCOMeanAveragePrecision from keras_cv.metrics.coco.recall import COCORecall diff --git a/keras_cv/metrics/coco/mean_average_precision.py b/keras_cv/metrics/coco/mean_average_precision.py new file mode 100644 index 0000000000..17f6689ec0 --- /dev/null +++ b/keras_cv/metrics/coco/mean_average_precision.py @@ -0,0 +1,317 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.metrics.coco import utils +from keras_cv.utils import bounding_box +from keras_cv.utils import iou as iou_lib + + +class COCOMeanAveragePrecision(tf.keras.metrics.Metric): + """COCOMeanAveragePrecision computes an approximation of MaP. + + Args: + class_ids: The class IDs to evaluate the metric for. To evaluate for + all classes in over a set of sequentially labelled classes, pass + `range(num_classes)`. + iou_thresholds: IoU thresholds over which to evaluate the recall. Must + be a tuple of floats, defaults to [0.5:0.05:0.95]. + area_range: area range to constrict the considered bounding boxes in + metric computation. Defaults to `None`, which makes the metric + count all bounding boxes. Must be a tuple of floats. The first + number in the tuple represents a lower bound for areas, while the + second value represents an upper bound. For example, when + `(0, 32**2)` is passed to the metric, recall is only evaluated for + objects with areas less than `32*32`. If `(32**2, 1000000**2)` is + passed the metric will only be evaluated for boxes with areas larger + than `32**2`, and smaller than `1000000**2`. + max_detections: number of maximum detections a model is allowed to make. + Must be an integer, defaults to `100`. + recall_thresholds: The list of thresholds to average over in the MaP + computation. List of floats. Defaults to [0:.01:1]. + num_buckets: num_buckets is used to select the number of confidence + buckets predictions are placed into. Instead of computation MaP + over each incrementally selected set of bounding boxes, we instead + place them into buckets. This makes distributed computation easier. + Increasing buckets improves accuracy of the metric, while decreasing + buckets improves performance. This is a tradeoff you must weight + for your use case. Defaults to 10,000 which is sufficiently large + for most use cases. + + Usage: + + COCOMeanAveragePrecision accepts two Tensors as input to it's + `update_state()` method. These Tensors represent bounding boxes in + `corners` format. Utilities to convert Tensors from `xywh` to `corners` + format can be found in `keras_cv.utils.bounding_box`. + + Each image in a dataset may have a different number of bounding boxes, + both in the ground truth dataset and the prediction set. In order to + account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors + with `-1`s to indicate unused boxes. A utility function to perform this + padding is available at + `keras_cv_.utils.bounding_box.pad_bounding_box_batch_to_shape()`. + + ```python + coco_map = keras_cv.metrics.COCOMeanAveragePrecision( + max_detections=100, + class_ids=[1] + ) + + y_true = np.array([[[0, 0, 10, 10, 1], [20, 20, 10, 10, 1]]]).astype(np.float32) + y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype( + np.float32 + ) + coco_map.update_state(y_true, y_pred) + coco_map.result() + # 0.24752477 + ``` + """ + + def __init__( + self, + class_ids, + recall_thresholds=None, + iou_thresholds=None, + area_range=None, + max_detections=100, + num_buckets=10000, + **kwargs + ): + super().__init__(**kwargs) + # Initialize parameter values + self.iou_thresholds = iou_thresholds or [x / 100.0 for x in range(50, 100, 5)] + self.area_range = area_range + self.max_detections = max_detections + self.class_ids = class_ids + self.recall_thresholds = recall_thresholds or [x / 100 for x in range(0, 101)] + self.num_buckets = num_buckets + + self.num_iou_thresholds = len(self.iou_thresholds) + self.num_class_ids = len(self.class_ids) + + self.ground_truths = self.add_weight( + "ground_truths", + shape=(self.num_class_ids,), + dtype=tf.int32, + initializer="zeros", + ) + self.true_positive_buckets = self.add_weight( + "true_positive_buckets", + shape=( + self.num_class_ids, + self.num_iou_thresholds, + self.num_buckets, + ), + dtype=tf.int32, + initializer="zeros", + ) + self.false_positive_buckets = self.add_weight( + "false_positive_buckets", + shape=( + self.num_class_ids, + self.num_iou_thresholds, + self.num_buckets, + ), + dtype=tf.int32, + initializer="zeros", + ) + + def reset_state(self): + self.true_positive_buckets.assign(tf.zeros_like(self.true_positive_buckets)) + self.false_positive_buckets.assign(tf.zeros_like(self.false_positive_buckets)) + self.ground_truths.assign(tf.zeros_like(self.ground_truths)) + + @tf.function() + def update_state(self, y_true, y_pred, sample_weight=None): + num_images = tf.shape(y_true)[0] + + if sample_weight is not None: + raise ValueError( + "COCOMeanAveragePrecision does not support `sample_weight`" + ) + + y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.CONFIDENCE) + + ground_truth_boxes_update = tf.zeros_like(self.ground_truths) + true_positive_buckets_update = tf.zeros_like(self.true_positive_buckets) + false_positive_buckets_update = tf.zeros_like(self.false_positive_buckets) + + for img in tf.range(num_images): + ground_truths = utils.filter_out_sentinels(y_true[img]) + detections = utils.filter_out_sentinels(y_pred[img]) + + if self.area_range is not None: + ground_truths = utils.filter_boxes_by_area_range( + ground_truths, self.area_range[0], self.area_range[1] + ) + detections = utils.filter_boxes_by_area_range( + detections, self.area_range[0], self.area_range[1] + ) + + detections = detections + if self.max_detections < tf.shape(detections)[0]: + detections = detections[: self.max_detections] + + true_positives_update = tf.TensorArray( + tf.int32, size=self.num_class_ids * self.num_iou_thresholds + ) + false_positives_update = tf.TensorArray( + tf.int32, size=self.num_class_ids * self.num_iou_thresholds + ) + ground_truths_update = tf.TensorArray(tf.int32, size=self.num_class_ids) + + for c_i in range(self.num_class_ids): + category_id = self.class_ids[c_i] + ground_truths = utils.filter_boxes( + ground_truths, value=category_id, axis=bounding_box.CLASS + ) + + detections = utils.filter_boxes( + detections, value=category_id, axis=bounding_box.CLASS + ) + if self.max_detections < tf.shape(detections)[0]: + detections = detections[: self.max_detections] + + ground_truths_update = ground_truths_update.write( + c_i, tf.shape(ground_truths)[0] + ) + + ious = iou_lib.compute_ious_for_image(ground_truths, detections) + + for iou_i in range(self.num_iou_thresholds): + iou_threshold = self.iou_thresholds[iou_i] + pred_matches = utils.match_boxes(ious, iou_threshold) + + dt_scores = detections[:, bounding_box.CONFIDENCE] + + true_positives = pred_matches != -1 + false_positives = pred_matches == -1 + + # We must divide by 1.01 to prevent off by one errors. + confidence_buckets = tf.cast( + tf.math.floor(self.num_buckets * (dt_scores / 1.01)), tf.int32 + ) + true_positives_by_bucket = tf.gather_nd( + confidence_buckets, indices=tf.where(true_positives) + ) + false_positives_by_bucket = tf.gather_nd( + confidence_buckets, indices=tf.where(false_positives) + ) + + true_positive_counts_per_bucket = tf.math.bincount( + true_positives_by_bucket, + minlength=self.num_buckets, + maxlength=self.num_buckets, + ) + false_positives_counts_per_bucket = tf.math.bincount( + false_positives_by_bucket, + minlength=self.num_buckets, + maxlength=self.num_buckets, + ) + + true_positives_update = true_positives_update.write( + (self.num_iou_thresholds * c_i) + iou_i, + true_positive_counts_per_bucket, + ) + false_positives_update = false_positives_update.write( + (self.num_iou_thresholds * c_i) + iou_i, + false_positives_counts_per_bucket, + ) + + true_positives_update = tf.reshape( + true_positives_update.stack(), + (self.num_class_ids, self.num_iou_thresholds, self.num_buckets), + ) + false_positives_update = tf.reshape( + false_positives_update.stack(), + (self.num_class_ids, self.num_iou_thresholds, self.num_buckets), + ) + + true_positive_buckets_update = ( + true_positive_buckets_update + true_positives_update + ) + false_positive_buckets_update = ( + false_positive_buckets_update + false_positives_update + ) + ground_truth_boxes_update = ( + ground_truth_boxes_update + ground_truths_update.stack() + ) + + self.ground_truths.assign_add(ground_truth_boxes_update) + self.true_positive_buckets.assign_add(true_positive_buckets_update) + self.false_positive_buckets.assign_add(false_positive_buckets_update) + + @tf.function() + def result(self): + true_positives = tf.cast(self.true_positive_buckets, self.dtype) + false_positives = tf.cast(self.false_positive_buckets, self.dtype) + ground_truths = tf.cast(self.ground_truths, self.dtype) + + true_positives_sum = tf.cumsum(true_positives, axis=-1) + false_positives_sum = tf.cumsum(false_positives, axis=-1) + + present_categories = tf.math.reduce_sum(tf.cast(ground_truths != 0, tf.int32)) + + if present_categories == 0: + return 0.0 + + recalls = tf.math.divide_no_nan( + true_positives_sum, ground_truths[:, None, None] + ) + precisions = true_positives_sum / (false_positives_sum + true_positives_sum) + + result = tf.TensorArray( + tf.float32, size=self.num_class_ids * self.num_iou_thresholds + ) + zero_pad = tf.zeros(shape=(1,), dtype=tf.float32) + # so in this case this should be: [1, 1] + for i in range(self.num_class_ids): + for j in range(self.num_iou_thresholds): + recalls_i = recalls[i, j] + precisions_i = precisions[i, j] + + # recall threshold=0 finds the first bucket always + # this is different from the original implementation because the + # original implementation always has at least one bounding box + # in the first bucket. + # + # as such, we need to mask out the buckets where there is at + # least one bounding box Therefore, we must filter out the + # buckets where (precisions_i) is NaN, as that implies a divide + # by zero. + + inds = tf.where(not tf.math.is_nan(precisions_i)) + recalls_i = tf.gather_nd(recalls_i, inds) + precisions_i = tf.gather_nd(precisions_i, inds) + + inds = tf.searchsorted( + recalls_i, tf.constant(self.recall_thresholds), side="left" + ) + + # if searchsorted returns len(precisions)+1, we should return 0 + precisions_i = tf.concat([precisions_i, zero_pad], axis=-1) + precision_per_recall_threshold = tf.gather(precisions_i, inds) + + result_ij = tf.math.reduce_mean(precision_per_recall_threshold, axis=-1) + result = result.write(j + i * self.num_iou_thresholds, result_ij) + + result = tf.reshape( + result.stack(), (self.num_class_ids, self.num_iou_thresholds) + ) + result = tf.math.reduce_mean(result, axis=-1) + result = tf.math.reduce_sum(result, axis=0) / tf.cast( + present_categories, tf.float32 + ) + return result diff --git a/keras_cv/metrics/coco/mean_average_precision_test.py b/keras_cv/metrics/coco/mean_average_precision_test.py new file mode 100644 index 0000000000..0cbfbb2384 --- /dev/null +++ b/keras_cv/metrics/coco/mean_average_precision_test.py @@ -0,0 +1,160 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for COCOMeanAveragePrecision.""" + +import numpy as np +import tensorflow as tf +from tensorflow import keras + +from keras_cv.metrics import COCOMeanAveragePrecision + + +class COCOMeanAveragePrecisionTest(tf.test.TestCase): + def test_runs_inside_model(self): + i = keras.layers.Input((None, None, 6)) + model = keras.Model(i, i) + + mean_average_precision = COCOMeanAveragePrecision( + max_detections=100, + num_buckets=4, + class_ids=[1], + area_range=(0, 64**2), + ) + + # These would match if they were in the area range + y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32) + y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.5]]]).astype( + np.float32 + ) + + model.compile(metrics=[mean_average_precision]) + + # mean_average_precision.update_state(y_true, y_pred) + + model.evaluate(y_pred, y_true) + + self.assertAllEqual(mean_average_precision.result(), 1.0) + + def test_first_buckets_have_no_boxes(self): + mean_average_precision = COCOMeanAveragePrecision( + iou_thresholds=[0.33], + class_ids=[1], + max_detections=100, + num_buckets=4, + recall_thresholds=[0.3, 0.5], + ) + + ground_truths = [3] + # one class + true_positives = [ + [ + [ + # one threshold + # three buckets + 0, + 0, + 1, + 2, + ] + ] + ] + false_positives = [ + [ + [ + # one threshold + # three buckets + 0, + 0, + 1, + 0, + ] + ] + ] + + # so we get: + # rcs = [0, 0, 0.33, 1.0] + # prs = [NaN, NaN, 0.5 , 0.75] + # after filtering: + # rcs = [0.33, 1.0] + # prs = [0.5, 0.75] + # so for PR pairs we get: + # [0.3, 0.5] + # [0.5, 0.75] + + # So mean average precision should be: (0.5 + 0.75)/2 = 0.625. + ground_truths = tf.constant(ground_truths, tf.int32) + true_positives = tf.constant(true_positives, tf.int32) + false_positives = tf.constant(false_positives, tf.int32) + + mean_average_precision.ground_truths.assign(ground_truths) + mean_average_precision.true_positive_buckets.assign(true_positives) + mean_average_precision.false_positive_buckets.assign(false_positives) + + self.assertEqual(mean_average_precision.result(), 0.625) + + def test_result_method_with_direct_assignment_one_threshold(self): + mean_average_precision = COCOMeanAveragePrecision( + iou_thresholds=[0.33], + class_ids=[1], + max_detections=100, + num_buckets=3, + recall_thresholds=[0.3, 0.5], + ) + + ground_truths = [3] + + # one class + true_positives = [ + [ + [ + # one threshold + # three buckets + 0, + 1, + 2, + ] + ] + ] + + false_positives = [ + [ + [ + # one threshold + # three buckets + 1, + 0, + 0, + ] + ] + ] + + # so we get: + # rcs = [0, 0.33, 1.0] + # prs = [0, 0.5 , 0.75] + + # so for PR pairs we get: + # [0.3, 0.5] + # [0.5, 0.75] + + # So mean average precision should be: (0.5 + 0.75)/2 = 0.625. + + ground_truths = tf.constant(ground_truths, tf.int32) + true_positives = tf.constant(true_positives, tf.int32) + false_positives = tf.constant(false_positives, tf.int32) + + mean_average_precision.ground_truths.assign(ground_truths) + mean_average_precision.true_positive_buckets.assign(true_positives) + mean_average_precision.false_positive_buckets.assign(false_positives) + + self.assertEqual(mean_average_precision.result(), 0.625) diff --git a/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb b/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb index a1cea437b5..94bece8d5e 100644 --- a/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb +++ b/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb @@ -179,15 +179,15 @@ "import random\n", "\n", "\n", - "def mutate_bbox(bbox):\n", + "def mutate_bounding_box(bounding_box):\n", " def shift(x, xw):\n", " return xw * random.uniform(-1, 1) + x\n", "\n", " return [\n", - " shift(bbox[0], bbox[2] / 10),\n", - " shift(bbox[1], bbox[3] / 10),\n", - " random.uniform(0.9, 1.1) * bbox[2],\n", - " random.uniform(0.9, 1.1) * bbox[3],\n", + " shift(bounding_box[0], bounding_box[2] / 10),\n", + " shift(bounding_box[1], bounding_box[3] / 10),\n", + " random.uniform(0.9, 1.1) * bounding_box[2],\n", + " random.uniform(0.9, 1.1) * bounding_box[3],\n", " ]\n", "\n", "\n", @@ -196,7 +196,7 @@ " result = {\n", " \"image_id\": gt[\"image_id\"],\n", " \"area\": gt[\"area\"],\n", - " \"bbox\": mutate_bbox(gt[\"bbox\"]),\n", + " \"bounding_box\": mutate_bounding_box(gt[\"bounding_box\"]),\n", " \"id\": gt[\"id\"],\n", " \"category_id\": gt[\"category_id\"],\n", " \"score\": random.uniform(0, 1),\n", @@ -228,7 +228,7 @@ "ret = {}\n", "\n", "cocoDt = coco.loadRes(results)\n", - "cocoEval = COCOeval(coco, cocoDt, \"bbox\")\n", + "cocoEval = COCOeval(coco, cocoDt, \"bounding_box\")\n", "cocoEval.evaluate()\n", "cocoEval.accumulate()\n", "cocoEval.summarize()" @@ -248,7 +248,7 @@ "creating index...\n", "index created!\n", "Running per image evaluation...\n", - "Evaluate annotation type *bbox*\n", + "Evaluate annotation type *bounding_box*\n", "DONE (t=5.57s).\n", "Accumulating evaluation results...\n", "DONE (t=1.68s).\n", @@ -296,17 +296,17 @@ "\n", "for annotation in ground_truths[\"annotations\"]:\n", " img_id = annotation[\"image_id\"]\n", - " bbox = annotation[\"bbox\"]\n", - " bbox = [x for x in bbox] + [int(annotation[\"category_id\"])]\n", - " groups[img_id].append(bbox)\n", + " bounding_box = annotation[\"bounding_box\"]\n", + " bounding_box = [x for x in bounding_box] + [int(annotation[\"category_id\"])]\n", + " groups[img_id].append(bounding_box)\n", "\n", "imgs = sorted(groups.keys())\n", "\n", "result = []\n", "for img in imgs:\n", - " bboxes = groups[img]\n", - " bboxes = np.array(bboxes)\n", - " result.append(bboxes)\n", + " bounding_boxes = groups[img]\n", + " bounding_boxes = np.array(bounding_boxes)\n", + " result.append(bounding_boxes)\n", "\n", "m = max([r.shape[0] for r in result])\n", "\n", @@ -425,17 +425,21 @@ "\n", "for annotation in results:\n", " img_id = annotation[\"image_id\"]\n", - " bbox = annotation[\"bbox\"]\n", - " bbox = [x for x in bbox] + [int(annotation[\"category_id\"])] + [annotation[\"score\"]]\n", - " groups[img_id].append(bbox)\n", + " bounding_box = annotation[\"bounding_box\"]\n", + " bounding_box = (\n", + " [x for x in bounding_box]\n", + " + [int(annotation[\"category_id\"])]\n", + " + [annotation[\"score\"]]\n", + " )\n", + " groups[img_id].append(bounding_box)\n", "\n", "imgs = sorted(groups.keys())\n", "\n", "result = []\n", "for img in imgs:\n", - " bboxes = groups[img]\n", - " bboxes = np.array(bboxes)\n", - " result.append(bboxes)\n", + " bounding_boxes = groups[img]\n", + " bounding_boxes = np.array(bounding_boxes)\n", + " result.append(bounding_boxes)\n", "\n", "m = max([r.shape[0] for r in result])\n", "\n", diff --git a/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py b/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py new file mode 100644 index 0000000000..43b7244ebc --- /dev/null +++ b/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py @@ -0,0 +1,131 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import numpy as np +import tensorflow as tf + +from keras_cv.metrics.coco import COCOMeanAveragePrecision +from keras_cv.utils import bounding_box + +SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz" + + +class MeanAveragePrecisionTest(tf.test.TestCase): + """Numerical testing for COCOMeanAveragePrecision. + + Unit tests that test Keras COCO metric results against the known values of + cocoeval.py. The bounding boxes in sample_boxes.npz were given to + cocoeval.py, which computed the following values: + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.643 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 1.000 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.729 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.644 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.633 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.689 + """ + + def test_mean_average_precision_correctness_default(self): + y_true, y_pred, categories = load_samples(SAMPLE_FILE) + + # Area range all + mean_average_precision = COCOMeanAveragePrecision( + class_ids=categories + [1000], max_detections=100, num_buckets=1000 + ) + + mean_average_precision.update_state(y_true, y_pred) + result = mean_average_precision.result().numpy() + self.assertAlmostEqual(result, 0.643, delta=0.06) + + def test_mean_average_precision_correctness_medium(self): + y_true, y_pred, categories = load_samples(SAMPLE_FILE) + + mean_average_precision = COCOMeanAveragePrecision( + class_ids=categories + [1000], + max_detections=100, + area_range=(0, 32**2), + ) + + mean_average_precision.update_state(y_true, y_pred) + result = mean_average_precision.result().numpy() + self.assertAlmostEqual(result, 0.633, delta=0.06) + + def test_mean_average_precision_correctness_small(self): + y_true, y_pred, categories = load_samples(SAMPLE_FILE) + + mean_average_precision = COCOMeanAveragePrecision( + class_ids=categories + [1000], + max_detections=100, + area_range=(0, 32**2), + ) + + mean_average_precision.update_state(y_true, y_pred) + result = mean_average_precision.result().numpy() + self.assertAlmostEqual(result, 0.644, delta=0.06) + + def test_mean_average_precision_correctness_iou_05(self): + y_true, y_pred, categories = load_samples(SAMPLE_FILE) + + mean_average_precision = COCOMeanAveragePrecision( + class_ids=categories + [1000], + iou_thresholds=[0.5], + max_detections=100, + area_range=(0, 1e5**2), + ) + + mean_average_precision.update_state(y_true, y_pred) + result = mean_average_precision.result().numpy() + self.assertAlmostEqual(result, 1.0, delta=0.06) + + def test_mean_average_precision_correctness_iou_75(self): + y_true, y_pred, categories = load_samples(SAMPLE_FILE) + + mean_average_precision = COCOMeanAveragePrecision( + class_ids=categories + [1000], + iou_thresholds=[0.75], + max_detections=100, + area_range=(0, 1e5**2), + ) + + mean_average_precision.update_state(y_true, y_pred) + result = mean_average_precision.result().numpy() + self.assertAlmostEqual(result, 0.729, delta=0.06) + + # TODO(lukewood): re-enable after performance testing + # def test_mean_average_precision_correctness_large(self): + # y_true, y_pred, categories = load_samples(SAMPLE_FILE) + # + # mean_average_precision = COCOMeanAveragePrecision( + # class_ids=categories + [1000], + # max_detections=100, + # area_range=(0, 32**2), + # ) + # + # mean_average_precision.update_state(y_true, y_pred) + # result = mean_average_precision.result().numpy() + # self.assertAlmostEqual(result, 0.689, delta=0.06) + + +def load_samples(fname): + npzfile = np.load(fname) + y_true = npzfile["arr_0"].astype(np.float32) + y_pred = npzfile["arr_1"].astype(np.float32) + + y_true = bounding_box.xywh_to_corners(y_true) + y_pred = bounding_box.xywh_to_corners(y_pred) + + categories = set(int(x) for x in y_true[:, :, 4].numpy().flatten()) + categories = [x for x in categories if x != -1] + + return y_true, y_pred, categories diff --git a/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py b/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py index 98806fd17f..bded419f5d 100644 --- a/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py +++ b/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py @@ -19,7 +19,7 @@ import tensorflow as tf from keras_cv.metrics import COCORecall -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz" @@ -126,8 +126,8 @@ def load_samples(fname): y_true = npzfile["arr_0"].astype(np.float32) y_pred = npzfile["arr_1"].astype(np.float32) - y_true = bbox.xywh_to_corners(y_true) - y_pred = bbox.xywh_to_corners(y_pred) + y_true = bounding_box.xywh_to_corners(y_true) + y_pred = bounding_box.xywh_to_corners(y_pred) categories = set(int(x) for x in y_true[:, :, 4].numpy().flatten()) categories = [x for x in categories if x != -1] diff --git a/keras_cv/metrics/coco/recall.py b/keras_cv/metrics/coco/recall.py index f57c1f24dc..d89b5ebb43 100644 --- a/keras_cv/metrics/coco/recall.py +++ b/keras_cv/metrics/coco/recall.py @@ -16,7 +16,7 @@ import tensorflow.keras.initializers as initializers from keras_cv.metrics.coco import utils -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box from keras_cv.utils import iou as iou_lib @@ -45,13 +45,14 @@ class COCORecall(keras.metrics.Metric): COCORecall accepts two Tensors as input to it's `update_state` method. These Tensors represent bounding boxes in `corners` format. Utilities to convert Tensors from `xywh` to `corners` format can be found in - `keras_cv.utils.bbox`. + `keras_cv.utils.bounding_box`. Each image in a dataset may have a different number of bounding boxes, both in the ground truth dataset and the prediction set. In order to account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors with `-1`s to indicate unused boxes. A utility function to perform this - padding is available at `keras_cv_.utils.bbox.pad_bbox_batch_to_shape`. + padding is available at + `keras_cv_.utils.bounding_box.pad_bounding_box_batch_to_shape`. ```python coco_recall = keras_cv.metrics.COCORecall( @@ -137,7 +138,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): num_thresholds = tf.shape(iou_thresholds)[0] num_categories = tf.shape(class_ids)[0] - # Sort by bbox.CONFIDENCE to make maxDetections easy to compute. + # Sort by bounding_box.CONFIDENCE to make maxDetections easy to compute. true_positives_update = tf.zeros_like(self.true_positives) ground_truth_boxes_update = tf.zeros_like(self.ground_truth_boxes) @@ -157,7 +158,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): category = class_ids[k_i] category_filtered_y_pred = utils.filter_boxes( - y_pred_for_image, value=category, axis=bbox.CLASS + y_pred_for_image, value=category, axis=bounding_box.CLASS ) detections = category_filtered_y_pred @@ -165,7 +166,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): detections = category_filtered_y_pred[: self.max_detections] ground_truths = utils.filter_boxes( - y_true_for_image, value=category, axis=bbox.CLASS + y_true_for_image, value=category, axis=bounding_box.CLASS ) ious = iou_lib.compute_ious_for_image(ground_truths, detections) @@ -191,6 +192,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): self.true_positives.assign_add(true_positives_update) self.ground_truth_boxes.assign_add(ground_truth_boxes_update) + @tf.function def result(self): present_values = self.ground_truth_boxes != 0 n_present_categories = tf.math.reduce_sum( diff --git a/keras_cv/metrics/coco/utils.py b/keras_cv/metrics/coco/utils.py index 6da61d5447..b930340532 100644 --- a/keras_cv/metrics/coco/utils.py +++ b/keras_cv/metrics/coco/utils.py @@ -14,33 +14,33 @@ """Contains shared utilities for Keras COCO metrics.""" import tensorflow as tf -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box def filter_boxes_by_area_range(boxes, min_area, max_area): - areas = bbox_area(boxes) + areas = bounding_box_area(boxes) inds = tf.where(tf.math.logical_and(areas >= min_area, areas < max_area)) return tf.gather_nd(boxes, inds) -def bbox_area(boxes): +def bounding_box_area(boxes): """box_areas returns the area of the provided bounding boxes. Args: boxes: Tensor of bounding boxes of shape `[..., 4+]` in corners format. Returns: areas: Tensor of areas of shape `[...]`. """ - w = boxes[..., bbox.RIGHT] - boxes[..., bbox.LEFT] - h = boxes[..., bbox.BOTTOM] - boxes[..., bbox.TOP] + w = boxes[..., bounding_box.RIGHT] - boxes[..., bounding_box.LEFT] + h = boxes[..., bounding_box.BOTTOM] - boxes[..., bounding_box.TOP] return tf.math.multiply(w, h) def filter_boxes(boxes, value, axis=4): """filter_boxes is used to select only boxes matching a given class. The most common use case for this is to filter to accept only a specific - bbox.CLASS. + bounding_box.CLASS. Args: - boxes: Tensor of bounding boxes in format `[images, bboxes, 6]` + boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]` value: Value the specified axis must match axis: Integer identifying the axis on which to sort, default 4 Returns: @@ -49,10 +49,10 @@ def filter_boxes(boxes, value, axis=4): return tf.gather_nd(boxes, tf.where(boxes[:, axis] == value)) -def to_sentinel_padded_bbox_tensor(box_sets): - """pad_with_sentinels returns a Tensor of bboxes padded with -1s - to ensure that each bbox set has identical dimensions. This is to - be used before passing bbox predictions, or bbox ground truths to +def to_sentinel_padded_bounding_box_tensor(box_sets): + """pad_with_sentinels returns a Tensor of bounding_boxes padded with -1s + to ensure that each bounding_box set has identical dimensions. This is to + be used before passing bounding_box predictions, or bounding_box ground truths to the keras COCO metrics. Args: box_sets: List of Tensors representing bounding boxes, or a list of lists of @@ -65,22 +65,23 @@ def to_sentinel_padded_bbox_tensor(box_sets): def filter_out_sentinels(boxes): """filter_out_sentinels to filter out boxes that were padded on to the prediction - or ground truth bbox tensor to ensure dimensions match. + or ground truth bounding_box tensor to ensure dimensions match. Args: - boxes: Tensor of bounding boxes in format `[bboxes, 6]`, usually from a + boxes: Tensor of bounding boxes in format `[bounding_boxes, 6]`, usually from a single image. Returns: boxes: A new Tensor of bounding boxes, where boxes[axis]!=-1. """ - return tf.gather_nd(boxes, tf.where(boxes[:, bbox.CLASS] != -1)) + return tf.gather_nd(boxes, tf.where(boxes[:, bounding_box.CLASS] != -1)) -def sort_bboxes(boxes, axis=5): - """sort_bboxes is used to sort a list of bounding boxes by a given axis. - The most common use case for this is to sort by bbox.CONFIDENCE, as this is a - part of computing both COCORecall and COCOMeanAveragePrecision. +def sort_bounding_boxes(boxes, axis=5): + """sort_bounding_boxes is used to sort a list of bounding boxes by a given axis. + + The most common use case for this is to sort by bounding_box.CONFIDENCE, as this is + a part of computing both COCORecall and COCOMeanAveragePrecision. Args: - boxes: Tensor of bounding boxes in format `[images, bboxes, 6]` + boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]` axis: Integer identifying the axis on which to sort, default 5 Returns: boxes: A new Tensor of Bounding boxes, sorted on an image-wise basis. diff --git a/keras_cv/metrics/coco/utils_test.py b/keras_cv/metrics/coco/utils_test.py index d0c8cc94f6..fc6b7131c1 100644 --- a/keras_cv/metrics/coco/utils_test.py +++ b/keras_cv/metrics/coco/utils_test.py @@ -16,86 +16,94 @@ import tensorflow as tf from keras_cv.metrics.coco import utils -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box from keras_cv.utils import iou as iou_lib class UtilTest(tf.test.TestCase): - def test_filter_bboxes_empty(self): - # set of bboxes - y_pred = tf.stack([_dummy_bbox(category=1)]) - result = utils.filter_boxes(y_pred, 2, axis=bbox.CLASS) + def test_filter_bounding_boxes_empty(self): + # set of bounding_boxes + y_pred = tf.stack([_dummy_bounding_box(category=1)]) + result = utils.filter_boxes(y_pred, 2, axis=bounding_box.CLASS) self.assertEqual(result.shape[0], 0) - def test_bbox_area(self): + def test_bounding_box_area(self): boxes = tf.constant([[0, 0, 100, 100]], dtype=tf.float32) - areas = utils.bbox_area(boxes) + areas = utils.bounding_box_area(boxes) self.assertAllClose(areas, tf.constant((10000.0,))) - def test_filter_bboxes(self): - # set of bboxes - y_pred = tf.stack([_dummy_bbox(category=1), _dummy_bbox(category=2)]) - result = utils.filter_boxes(y_pred, 2, axis=bbox.CLASS) + def test_filter_bounding_boxes(self): + # set of bounding_boxes + y_pred = tf.stack( + [_dummy_bounding_box(category=1), _dummy_bounding_box(category=2)] + ) + result = utils.filter_boxes(y_pred, 2, axis=bounding_box.CLASS) - self.assertAllClose(result, tf.stack([_dummy_bbox(category=2)])) + self.assertAllClose(result, tf.stack([_dummy_bounding_box(category=2)])) - def test_to_sentinel_padded_bbox_tensor(self): - box_set1 = tf.stack([_dummy_bbox(), _dummy_bbox()]) - box_set2 = tf.stack([_dummy_bbox()]) + def test_to_sentinel_padded_bounding_box_tensor(self): + box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()]) + box_set2 = tf.stack([_dummy_bounding_box()]) boxes = [box_set1, box_set2] - bbox_tensor = utils.to_sentinel_padded_bbox_tensor(boxes) + bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes) self.assertAllClose( - bbox_tensor[1, 1], + bounding_box_tensor[1, 1], -tf.ones( 6, ), ) def test_filter_out_sentinels(self): - # set of bboxes - y_pred = tf.stack([_dummy_bbox(category=1), _dummy_bbox(category=-1)]) + # set of bounding_boxes + y_pred = tf.stack( + [_dummy_bounding_box(category=1), _dummy_bounding_box(category=-1)] + ) result = utils.filter_out_sentinels(y_pred) - self.assertAllClose(result, tf.stack([_dummy_bbox(category=1)])) + self.assertAllClose(result, tf.stack([_dummy_bounding_box(category=1)])) def test_end_to_end_sentinel_filtering(self): - box_set1 = tf.stack([_dummy_bbox(), _dummy_bbox()]) - box_set2 = tf.stack([_dummy_bbox()]) + box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()]) + box_set2 = tf.stack([_dummy_bounding_box()]) boxes = [box_set1, box_set2] - bbox_tensor = utils.to_sentinel_padded_bbox_tensor(boxes) + bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes) - self.assertAllClose(utils.filter_out_sentinels(bbox_tensor[0]), box_set1) - self.assertAllClose(utils.filter_out_sentinels(bbox_tensor[1]), box_set2) + self.assertAllClose( + utils.filter_out_sentinels(bounding_box_tensor[0]), box_set1 + ) + self.assertAllClose( + utils.filter_out_sentinels(bounding_box_tensor[1]), box_set2 + ) def test_match_boxes(self): y_pred = tf.stack( [ - _dummy_bbox(0.1), - _dummy_bbox(0.9), - _dummy_bbox(0.4), + _dummy_bounding_box(0.1), + _dummy_bounding_box(0.9), + _dummy_bounding_box(0.4), ] ) y_true = tf.stack( [ - _dummy_bbox(0.1), - _dummy_bbox(0.9), - _dummy_bbox(0.4), - _dummy_bbox(0.2), + _dummy_bounding_box(0.1), + _dummy_bounding_box(0.9), + _dummy_bounding_box(0.4), + _dummy_bounding_box(0.2), ] ) ious = iou_lib.compute_ious_for_image(y_true, y_pred) self.assertEqual(utils.match_boxes(ious, 0.5).shape, [3]) - def test_sort_bboxes_unsorted_list(self): + def test_sort_bounding_boxes_unsorted_list(self): y_pred = tf.expand_dims( tf.stack( [ - _dummy_bbox(0.1), - _dummy_bbox(0.9), - _dummy_bbox(0.4), - _dummy_bbox(0.2), + _dummy_bounding_box(0.1), + _dummy_bounding_box(0.9), + _dummy_bounding_box(0.4), + _dummy_bounding_box(0.2), ] ), axis=0, @@ -103,23 +111,23 @@ def test_sort_bboxes_unsorted_list(self): want = tf.expand_dims( tf.stack( [ - _dummy_bbox(0.9), - _dummy_bbox(0.4), - _dummy_bbox(0.2), - _dummy_bbox(0.1), + _dummy_bounding_box(0.9), + _dummy_bounding_box(0.4), + _dummy_bounding_box(0.2), + _dummy_bounding_box(0.1), ] ), axis=0, ) - y_sorted = utils.sort_bboxes(y_pred, bbox.CONFIDENCE) + y_sorted = utils.sort_bounding_boxes(y_pred, bounding_box.CONFIDENCE) self.assertAllClose(y_sorted, want) - def test_sort_bboxes_empty_list(self): + def test_sort_bounding_boxes_empty_list(self): y_pred = tf.stack([]) - y_sorted = utils.sort_bboxes(y_pred) + y_sorted = utils.sort_bounding_boxes(y_pred) self.assertAllClose(y_pred, y_sorted) -def _dummy_bbox(confidence=0.0, category=0): - """returns a bbox dummy with all 0 values, except for confidence.""" +def _dummy_bounding_box(confidence=0.0, category=0): + """returns a bounding_box dummy with all 0 values, except for confidence.""" return tf.constant([0, 0, 0, 0, category, confidence]) diff --git a/keras_cv/utils/__init__.py b/keras_cv/utils/__init__.py index acf2415e12..1536ac35db 100644 --- a/keras_cv/utils/__init__.py +++ b/keras_cv/utils/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box diff --git a/keras_cv/utils/bbox_test.py b/keras_cv/utils/bbox_test.py deleted file mode 100644 index e0e26fa033..0000000000 --- a/keras_cv/utils/bbox_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# Copyright 2022 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tensorflow as tf - -from keras_cv.utils import bbox - - -class BBOXTestCase(tf.test.TestCase): - def setUp(self): - super().setUp() - self.corner_bbox = tf.constant( - [[10, 10, 110, 110], [20, 20, 120, 120]], dtype=tf.float32 - ) - self.xywh_bbox = tf.constant( - [[60, 60, 100, 100], [70, 70, 100, 100]], dtype=tf.float32 - ) - - def test_corner_to_xywh(self): - self.assertAllClose(bbox.corners_to_xywh(self.corner_bbox), self.xywh_bbox) - - # Make sure it also accept higher rank than 2 - corner_bbox_3d = tf.expand_dims(self.corner_bbox, 0) - xywh_bbox_3d = tf.expand_dims(self.xywh_bbox, 0) - self.assertAllClose(bbox.corners_to_xywh(corner_bbox_3d), xywh_bbox_3d) - - # Make sure it also accept more value after last index. - padded_corner_bbox = tf.pad( - self.corner_bbox, [[0, 0], [0, 2]] - ) # Right pad 2 more value - padded_xywh_bbox = tf.pad(self.xywh_bbox, [[0, 0], [0, 2]]) - self.assertAllClose(bbox.corners_to_xywh(padded_corner_bbox), padded_xywh_bbox) - - # Same for higher rank - padded_corner_bbox_3d = tf.expand_dims(padded_corner_bbox, 0) - padded_xywh_bbox_3d = tf.expand_dims(padded_xywh_bbox, 0) - self.assertAllClose( - bbox.corners_to_xywh(padded_corner_bbox_3d), padded_xywh_bbox_3d - ) - - def test_xywh_to_corner(self): - self.assertAllClose(bbox.xywh_to_corners(self.xywh_bbox), self.corner_bbox) - - # Make sure it also accept higher rank than 2 - corner_bbox_3d = tf.expand_dims(self.corner_bbox, 0) - xywh_bbox_3d = tf.expand_dims(self.xywh_bbox, 0) - self.assertAllClose(bbox.xywh_to_corners(xywh_bbox_3d), corner_bbox_3d) - - # Make sure it also accept more value after last index. - padded_corner_bbox = tf.pad( - self.corner_bbox, [[0, 0], [0, 2]] - ) # Right pad 2 more value - padded_xywh_bbox = tf.pad(self.xywh_bbox, [[0, 0], [0, 2]]) - self.assertAllClose(bbox.xywh_to_corners(padded_xywh_bbox), padded_corner_bbox) - - # Same for higher rank - padded_corner_bbox_3d = tf.expand_dims(padded_corner_bbox, 0) - padded_xywh_bbox_3d = tf.expand_dims(padded_xywh_bbox, 0) - self.assertAllClose( - bbox.xywh_to_corners(padded_xywh_bbox_3d), padded_corner_bbox_3d - ) - - def test_bbox_padding(self): - bboxes = [[1, 2, 3, 4], [5, 6, 7, 8]] - target_shape = [3, 4] - result = bbox.pad_bbox_batch_to_shape(bboxes, target_shape) - self.assertAllClose(result, [[1, 2, 3, 4], [5, 6, 7, 8], [-1, -1, -1, -1]]) - - target_shape = [2, 5] - result = bbox.pad_bbox_batch_to_shape(bboxes, target_shape) - self.assertAllClose(result, [[1, 2, 3, 4, -1], [5, 6, 7, 8, -1]]) - - # Make sure to raise error if the rank is different between bbox and target - # shape - with self.assertRaisesRegex(ValueError, "Target shape should have same rank"): - bbox.pad_bbox_batch_to_shape(bboxes, [1, 2, 3]) - - # Make sure raise error if the target shape is smaller - target_shape = [3, 2] - with self.assertRaisesRegex( - ValueError, "Target shape should be larger than bounding box shape" - ): - bbox.pad_bbox_batch_to_shape(bboxes, target_shape) diff --git a/keras_cv/utils/bbox.py b/keras_cv/utils/bounding_box.py similarity index 65% rename from keras_cv/utils/bbox.py rename to keras_cv/utils/bounding_box.py index eb18d8302a..2cb0e8c129 100644 --- a/keras_cv/utils/bbox.py +++ b/keras_cv/utils/bounding_box.py @@ -52,16 +52,16 @@ CONFIDENCE = 5 -def corners_to_xywh(bboxes): - """Converts bboxes in corners format to XYWH format. +def corners_to_xywh(bounding_boxes): + """Converts bounding_boxes in corners format to XYWH format. Args: - bboxes: a Tensor which has at least 2D rank, with shape [..., 4] + bounding_boxes: a Tensor which has at least 2D rank, with shape [..., 4] Returns: - converted bboxes with same shape, but in XYWH format. + converted bounding_boxes with same shape, but in XYWH format. """ - left, top, right, bottom, rest = tf.split(bboxes, [1, 1, 1, 1, -1], axis=-1) + left, top, right, bottom, rest = tf.split(bounding_boxes, [1, 1, 1, 1, -1], axis=-1) return tf.concat( [ # We use ... here in case user has higher rank of inputs. @@ -75,16 +75,16 @@ def corners_to_xywh(bboxes): ) -def xywh_to_corners(bboxes): - """Converts bboxes in XYWH format to corners format. +def xywh_to_corners(bounding_boxes): + """Converts bounding_boxes in XYWH format to corners format. Args: - bboxes: a Tensor which has at least 2D rank, with shape [..., 4] + bounding_boxes: a Tensor which has at least 2D rank, with shape [..., 4] Returns: - converted bboxes with same shape, but in corners format. + converted bounding_boxes with same shape, but in corners format. """ - x, y, width, height, rest = tf.split(bboxes, [1, 1, 1, 1, -1], axis=-1) + x, y, width, height, rest = tf.split(bounding_boxes, [1, 1, 1, 1, -1], axis=-1) return tf.concat( [ x - width / 2.0, @@ -97,50 +97,55 @@ def xywh_to_corners(bboxes): ) -def pad_bbox_batch_to_shape(bboxes, target_shape, padding_values=-1): +def pad_bounding_box_batch_to_shape(bounding_boxes, target_shape, padding_values=-1): """Pads a list of bounding boxes with -1s. Boxes represented by all -1s are ignored by COCO metrics. Sample usage: - bbox = [[1, 2, 3, 4], [5, 6, 7, 8]] # 2 bboxes with with xywh or corner format. - target_shape = [3, 4] # Add 1 more dummy bbox - result = pad_bbox_batch_to_shape(bbox, target_shape) + bounding_box = [[1, 2, 3, 4], [5, 6, 7, 8]] # 2 bounding_boxes with with xywh or + corners format. + target_shape = [3, 4] # Add 1 more dummy bounding_box + result = pad_bounding_box_batch_to_shape(bounding_box, target_shape) # result == [[1, 2, 3, 4], [5, 6, 7, 8], [-1, -1, -1, -1]] target_shape = [2, 5] # Add 1 more index after the current 4 coordinates. - result = pad_bbox_batch_to_shape(bbox, target_shape) + result = pad_bounding_box_batch_to_shape(bounding_box, target_shape) # result == [[1, 2, 3, 4, -1], [5, 6, 7, 8, -1]] Args: - bboxes: tf.Tensor of bounding boxes in any format. + bounding_boxes: tf.Tensor of bounding boxes in any format. target_shape: Target shape to pad bounding box to. This should have the same - rank as the bbboxs. Note that if the target_shape contains any dimension - that is smaller than the bounding box shape, then no value will be padded + rank as the bbounding_boxs. Note that if the target_shape contains any + dimension that is smaller than the bounding box shape, then no value will be + padded. padding_values: value to pad, defaults to -1 to mask out in coco metrics. Returns: - bboxes padded to target shape. + bounding_boxes padded to target shape. Raises: ValueError, when target shape has smaller rank or dimension value when comparing with shape of bounding boxes. """ - bbox_shape = tf.shape(bboxes) - if len(bbox_shape) != len(target_shape): + bounding_box_shape = tf.shape(bounding_boxes) + if len(bounding_box_shape) != len(target_shape): raise ValueError( "Target shape should have same rank as the bounding box. " - f"Got bbox shape = {bbox_shape}, " + f"Got bounding_box shape = {bounding_box_shape}, " f"target_shape = {target_shape}" ) for dim in range(len(target_shape)): - if bbox_shape[dim] > target_shape[dim]: + if bounding_box_shape[dim] > target_shape[dim]: raise ValueError( "Target shape should be larger than bounding box shape " "in all dimensions. " - f"Got bbox shape = {bbox_shape}, " + f"Got bounding_box shape = {bounding_box_shape}, " f"target_shape = {target_shape}" ) paddings = [ - [0, target_shape[dim] - bbox_shape[dim]] for dim in range(len(target_shape)) + [0, target_shape[dim] - bounding_box_shape[dim]] + for dim in range(len(target_shape)) ] - return tf.pad(bboxes, paddings, mode="CONSTANT", constant_values=padding_values) + return tf.pad( + bounding_boxes, paddings, mode="CONSTANT", constant_values=padding_values + ) diff --git a/keras_cv/utils/bounding_box_test.py b/keras_cv/utils/bounding_box_test.py new file mode 100644 index 0000000000..9b5142dbad --- /dev/null +++ b/keras_cv/utils/bounding_box_test.py @@ -0,0 +1,116 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv.utils import bounding_box + + +class BBOXTestCase(tf.test.TestCase): + def setUp(self): + super().setUp() + self.corner_bounding_box = tf.constant( + [[10, 10, 110, 110], [20, 20, 120, 120]], dtype=tf.float32 + ) + self.xywh_bounding_box = tf.constant( + [[60, 60, 100, 100], [70, 70, 100, 100]], dtype=tf.float32 + ) + + def test_corner_to_xywh(self): + self.assertAllClose( + bounding_box.corners_to_xywh(self.corner_bounding_box), + self.xywh_bounding_box, + ) + + # Make sure it also accept higher rank than 2 + corner_bounding_box_3d = tf.expand_dims(self.corner_bounding_box, 0) + xywh_bounding_box_3d = tf.expand_dims(self.xywh_bounding_box, 0) + self.assertAllClose( + bounding_box.corners_to_xywh(corner_bounding_box_3d), xywh_bounding_box_3d + ) + + # Make sure it also accept more value after last index. + padded_corner_bounding_box = tf.pad( + self.corner_bounding_box, [[0, 0], [0, 2]] + ) # Right pad 2 more value + padded_xywh_bounding_box = tf.pad(self.xywh_bounding_box, [[0, 0], [0, 2]]) + self.assertAllClose( + bounding_box.corners_to_xywh(padded_corner_bounding_box), + padded_xywh_bounding_box, + ) + + # Same for higher rank + padded_corner_bounding_box_3d = tf.expand_dims(padded_corner_bounding_box, 0) + padded_xywh_bounding_box_3d = tf.expand_dims(padded_xywh_bounding_box, 0) + self.assertAllClose( + bounding_box.corners_to_xywh(padded_corner_bounding_box_3d), + padded_xywh_bounding_box_3d, + ) + + def test_xywh_to_corner(self): + self.assertAllClose( + bounding_box.xywh_to_corners(self.xywh_bounding_box), + self.corner_bounding_box, + ) + + # Make sure it also accept higher rank than 2 + corner_bounding_box_3d = tf.expand_dims(self.corner_bounding_box, 0) + xywh_bounding_box_3d = tf.expand_dims(self.xywh_bounding_box, 0) + self.assertAllClose( + bounding_box.xywh_to_corners(xywh_bounding_box_3d), corner_bounding_box_3d + ) + + # Make sure it also accept more value after last index. + padded_corner_bounding_box = tf.pad( + self.corner_bounding_box, [[0, 0], [0, 2]] + ) # Right pad 2 more value + padded_xywh_bounding_box = tf.pad(self.xywh_bounding_box, [[0, 0], [0, 2]]) + self.assertAllClose( + bounding_box.xywh_to_corners(padded_xywh_bounding_box), + padded_corner_bounding_box, + ) + + # Same for higher rank + padded_corner_bounding_box_3d = tf.expand_dims(padded_corner_bounding_box, 0) + padded_xywh_bounding_box_3d = tf.expand_dims(padded_xywh_bounding_box, 0) + self.assertAllClose( + bounding_box.xywh_to_corners(padded_xywh_bounding_box_3d), + padded_corner_bounding_box_3d, + ) + + def test_bounding_box_padding(self): + bounding_boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] + target_shape = [3, 4] + result = bounding_box.pad_bounding_box_batch_to_shape( + bounding_boxes, target_shape + ) + self.assertAllClose(result, [[1, 2, 3, 4], [5, 6, 7, 8], [-1, -1, -1, -1]]) + + target_shape = [2, 5] + result = bounding_box.pad_bounding_box_batch_to_shape( + bounding_boxes, target_shape + ) + self.assertAllClose(result, [[1, 2, 3, 4, -1], [5, 6, 7, 8, -1]]) + + # Make sure to raise error if the rank is different between bounding_box and + # target shape + with self.assertRaisesRegex(ValueError, "Target shape should have same rank"): + bounding_box.pad_bounding_box_batch_to_shape(bounding_boxes, [1, 2, 3]) + + # Make sure raise error if the target shape is smaller + target_shape = [3, 2] + with self.assertRaisesRegex( + ValueError, "Target shape should be larger than bounding box shape" + ): + bounding_box.pad_bounding_box_batch_to_shape(bounding_boxes, target_shape) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index 1d46ff6288..acd5894053 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -13,7 +13,7 @@ # limitations under the License. import tensorflow as tf -from keras_cv.utils import bbox +from keras_cv.utils import bounding_box def _axis_mask(axis_lengths, offsets, mask_len): @@ -133,7 +133,7 @@ def fill_rectangle(images, centers_x, centers_y, widths, heights, fill_values): xywh = tf.stack([centers_x, centers_y, widths, heights], axis=1) xywh = tf.cast(xywh, tf.float32) - corners = bbox.xywh_to_corners(xywh) + corners = bounding_box.xywh_to_corners(xywh) mask_shape = (images_width, images_height) is_rectangle = rectangle_masks(corners, mask_shape) diff --git a/keras_cv/utils/iou.py b/keras_cv/utils/iou.py index 99461cde86..a37225386e 100644 --- a/keras_cv/utils/iou.py +++ b/keras_cv/utils/iou.py @@ -21,9 +21,10 @@ def compute_ious_for_image(boxes1, boxes2): The lookup vector is to be indexed by [`boxes1_index`,`boxes2_index`]. Bounding boxes are expected to be in the corners format of - `[bbox.LEFT, bbox.RIGHT, bbox.TOP, bbox.BOTTOM]`. For example, the bounding box - with it's left side at 100, bbox.RIGHT side at 200, bbox.TOP at 101, and - bbox.BOTTOM at 201 would be represented as: + `[bounding_box.LEFT, bounding_box.RIGHT, bounding_box.TOP, bounding_box.BOTTOM]`. + For example, the bounding box with it's left side at 100, bounding_box.RIGHT side at + 200, bounding_box.TOP at 101, and bounding_box.BOTTOM at 201 would be represented + as: > [100, 200, 101, 201] Args: From 357717b01ac2a7839807e8c01d84c722dd079624 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sun, 27 Feb 2022 03:29:35 +0100 Subject: [PATCH 31/43] optimize bounding box mask generation --- keras_cv/layers/preprocessing/grid_mask.py | 2 +- keras_cv/utils/fill_utils.py | 109 +++++---------------- 2 files changed, 27 insertions(+), 84 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 41140a6f70..15dcb1b14f 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -207,7 +207,7 @@ def _compute_grid_masks(self, input_shape): # make mask for each rectangle mask_side_len = tf.cast(mask_side_len, tf.int32) - masks = fill_utils.rectangle_masks(corners, (mask_side_len, mask_side_len)) + masks = fill_utils.corners_to_mask(corners, (mask_side_len, mask_side_len)) # reshape masks into shape # (batch_size, rectangles_per_image, mask_height, mask_width) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index acd5894053..77ab63f372 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -16,101 +16,44 @@ from keras_cv.utils import bounding_box -def _axis_mask(axis_lengths, offsets, mask_len): - axis_mask = tf.sequence_mask(axis_lengths, mask_len) - rev_lengths = tf.minimum(offsets + axis_lengths, mask_len) - axis_mask = tf.reverse_sequence(axis_mask, rev_lengths, seq_axis=1) - return axis_mask - - -def xywh_to_mask(xywh, mask_shape): - width, height = mask_shape - cx = xywh[:, 0] - cy = xywh[:, 1] - w = xywh[:, 2] - h = xywh[:, 3] - x0 = cx - (w / 2) - y0 = cy - (h / 2) - - w = tf.cast(w, tf.int32) - h = tf.cast(h, tf.int32) - x0 = tf.cast(x0, tf.int32) - y0 = tf.cast(y0, tf.int32) - w_mask = _axis_mask(w, x0, width) - h_mask = _axis_mask(h, y0, height) - - w_mask = tf.expand_dims(w_mask, axis=-2) - h_mask = tf.expand_dims(h_mask, axis=-1) - masks = tf.logical_and(w_mask, h_mask) +def _axis_mask(starts, ends, mask_len): + # index range of axis + batch_size = tf.shape(starts)[0] + axis_indices = tf.range(mask_len, dtype=starts.dtype) + axis_indices = tf.expand_dims(axis_indices, 0) + axis_indices = tf.tile(axis_indices, [batch_size, 1]) - return masks + # mask of index bounds + above_eq_starts = tf.greater_equal(axis_indices, tf.expand_dims(starts, 1)) + less_ends = tf.less(axis_indices, tf.expand_dims(ends, 1)) + return above_eq_starts & less_ends -def corners_to_mask(corners, mask_shape): - width, height = mask_shape - x0 = corners[:, 0] - y0 = corners[:, 1] - x1 = corners[:, 2] - y1 = corners[:, 3] - w = x1 - x0 - h = y1 - y0 - - w = tf.cast(w, tf.int32) - h = tf.cast(h, tf.int32) - x0 = tf.cast(x0, tf.int32) - y0 = tf.cast(y0, tf.int32) - w_mask = _axis_mask(w, x0, width) - h_mask = _axis_mask(h, y0, height) - - w_mask = tf.expand_dims(w_mask, axis=-2) - h_mask = tf.expand_dims(h_mask, axis=-1) - masks = tf.logical_and(w_mask, h_mask) - - return masks - - -def rectangle_masks(corners, mask_shape): - """Computes masks of rectangles +def corners_to_mask(bounding_boxes, mask_shape): + """Converts bounding boxes in corners format to boolean masks Args: - corners: tensor of rectangle coordinates with shape (batch_size, 4) in + bounding_boxes: tensor of rectangle coordinates with shape (batch_size, 4) in corners format (x0, y0, x1, y1). mask_shape: a shape tuple as (width, height) indicating the output width and height of masks. Returns: boolean masks with shape (batch_size, width, height) where True values - indicate positions within rectangle coordinates. + indicate positions within bounding box coordinates. """ - # add broadcasting axes - corners = corners[..., tf.newaxis, tf.newaxis] - - # split coordinates - x0 = corners[:, 0] - y0 = corners[:, 1] - x1 = corners[:, 2] - y1 = corners[:, 3] - - # repeat height and width - width, height = mask_shape - x0_rep = tf.repeat(x0, height, axis=1) - y0_rep = tf.repeat(y0, width, axis=2) - x1_rep = tf.repeat(x1, height, axis=1) - y1_rep = tf.repeat(y1, width, axis=2) - - # range grid - batch_size = tf.shape(corners)[0] - range_row = tf.range(0, height, dtype=corners.dtype) - range_col = tf.range(0, width, dtype=corners.dtype) - range_row = tf.repeat(range_row[tf.newaxis, :, tf.newaxis], batch_size, 0) - range_col = tf.repeat(range_col[tf.newaxis, tf.newaxis, :], batch_size, 0) - - # boolean masks - masks = tf.less_equal(x0_rep, range_col) - masks = masks & tf.less_equal(y0_rep, range_row) - masks = masks & tf.less(range_col, x1_rep) - masks = masks & tf.less(range_row, y1_rep) + mask_width, mask_height = mask_shape + x0 = bounding_boxes[:, 0] + y0 = bounding_boxes[:, 1] + x1 = bounding_boxes[:, 2] + y1 = bounding_boxes[:, 3] + w_mask = _axis_mask(x0, x1, mask_width) + h_mask = _axis_mask(y0, y1, mask_height) + + w_mask = tf.expand_dims(w_mask, axis=1) + h_mask = tf.expand_dims(h_mask, axis=2) + masks = tf.logical_and(w_mask, h_mask) return masks @@ -136,7 +79,7 @@ def fill_rectangle(images, centers_x, centers_y, widths, heights, fill_values): corners = bounding_box.xywh_to_corners(xywh) mask_shape = (images_width, images_height) - is_rectangle = rectangle_masks(corners, mask_shape) + is_rectangle = corners_to_mask(corners, mask_shape) is_rectangle = tf.expand_dims(is_rectangle, -1) images = tf.where(is_rectangle, fill_values, images) From ed5161e632560a461c4d0e970424e1d8d807c904 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sun, 27 Feb 2022 03:29:49 +0100 Subject: [PATCH 32/43] add tests --- keras_cv/utils/fill_utils_test.py | 162 ++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) diff --git a/keras_cv/utils/fill_utils_test.py b/keras_cv/utils/fill_utils_test.py index e8034eaf20..0d48d0f7be 100644 --- a/keras_cv/utils/fill_utils_test.py +++ b/keras_cv/utils/fill_utils_test.py @@ -16,6 +16,168 @@ from keras_cv.utils import fill_utils +class BoundingBoxToMaskTest(tf.test.TestCase): + def _run_test(self, corners, expected): + mask = fill_utils.corners_to_mask(corners, mask_shape=(6, 6)) + mask = tf.cast(mask, dtype=tf.int32) + tf.assert_equal(mask, expected) + + def test_corners_whole(self): + expected = tf.constant( + [ + [0, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ], + dtype=tf.int32, + ) + corners = tf.constant([[1, 0, 4, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_corners_frac(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 0], + [0, 0, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[1.5, 0.5, 4.5, 3.5]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_width_zero(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[0, 0, 0, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_height_zero(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[1, 0, 4, 0]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_width_negative(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[1, 0, -2, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_height_negative(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[1, 0, 4, -2]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_width_out_of_lower_bound(self): + expected = tf.constant( + [ + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[-2, -2, 2, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_width_out_of_upper_bound(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[4, 0, 8, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_height_out_of_lower_bound(self): + expected = tf.constant( + [ + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[-2, -2, 2, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_height_out_of_upper_bound(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 1, 1], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[4, 0, 8, 3]], dtype=tf.float32) + self._run_test(corners, expected) + + def test_start_out_of_upper_bound(self): + expected = tf.constant( + [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + ] + ) + corners = tf.constant([[8, 8, 10, 12]], dtype=tf.float32) + self._run_test(corners, expected) + + class FillRectangleTest(tf.test.TestCase): def _run_test(self, img_w, img_h, cent_x, cent_y, rec_w, rec_h, expected): batch_size = 1 From 994242a079d002ca3c0d7d422b594f19537d8eb5 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Mon, 28 Feb 2022 18:37:03 +0100 Subject: [PATCH 33/43] minor refactor --- keras_cv/utils/fill_utils.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index 77ab63f372..50b820d5bf 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -24,9 +24,8 @@ def _axis_mask(starts, ends, mask_len): axis_indices = tf.tile(axis_indices, [batch_size, 1]) # mask of index bounds - above_eq_starts = tf.greater_equal(axis_indices, tf.expand_dims(starts, 1)) - less_ends = tf.less(axis_indices, tf.expand_dims(ends, 1)) - return above_eq_starts & less_ends + axis_mask = tf.greater_equal(axis_indices, starts) & tf.less(axis_indices, ends) + return axis_mask def corners_to_mask(bounding_boxes, mask_shape): @@ -43,10 +42,7 @@ def corners_to_mask(bounding_boxes, mask_shape): indicate positions within bounding box coordinates. """ mask_width, mask_height = mask_shape - x0 = bounding_boxes[:, 0] - y0 = bounding_boxes[:, 1] - x1 = bounding_boxes[:, 2] - y1 = bounding_boxes[:, 3] + x0, y0, x1, y1 = tf.split(bounding_boxes, [1, 1, 1, 1], axis=-1) w_mask = _axis_mask(x0, x1, mask_width) h_mask = _axis_mask(y0, y1, mask_height) From 532889a306683eefffe843ef4d2646205180da5e Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Mon, 28 Feb 2022 22:57:45 +0100 Subject: [PATCH 34/43] minor refactor --- keras_cv/layers/preprocessing/grid_mask.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 15dcb1b14f..6e9a81fad9 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -144,7 +144,7 @@ def _compute_grid_masks(self, input_shape): ratio = self.ratio rectangle_side_len = tf.cast((1 - ratio) * unit_sizes, tf.float32) - # x and y offsets for grid units + # sample x and y offsets for grid units randomly between 0 and unit_sizes delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) delta_x = delta_x * unit_sizes @@ -199,11 +199,8 @@ def _compute_grid_masks(self, input_shape): # combine coordinates to (x0, y0, x1, y1) # with shape (num_rectangles_in_batch, 4) - corners0 = tf.stack([x0, y0], axis=-1) - corners1 = tf.stack([x1, y1], axis=-1) - corners0 = tf.reshape(corners0, [-1, 2]) - corners1 = tf.reshape(corners1, [-1, 2]) - corners = tf.concat([corners0, corners1], axis=1) + corners = tf.stack([x0, y0, x1, y1], axis=-1) + corners = tf.reshape(corners, [-1, 4]) # make mask for each rectangle mask_side_len = tf.cast(mask_side_len, tf.int32) @@ -252,6 +249,7 @@ def _grid_mask(self, images): # convert back to boolean mask masks = tf.cast(masks, tf.bool) + # fill if self.fill_mode == "constant": fill_value = tf.fill(input_shape, self.fill_value) else: From 37e676077089824a0b7b928826273474db9b14af Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Mon, 28 Feb 2022 23:18:43 +0100 Subject: [PATCH 35/43] formatting --- examples/layers/preprocessing/grid_mask_demo.py | 2 +- keras_cv/layers/preprocessing/grid_mask_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/layers/preprocessing/grid_mask_demo.py b/examples/layers/preprocessing/grid_mask_demo.py index 4b54470536..55691a2653 100644 --- a/examples/layers/preprocessing/grid_mask_demo.py +++ b/examples/layers/preprocessing/grid_mask_demo.py @@ -37,7 +37,7 @@ def main(): ratio="random", rotation_factor=0.5, fill_mode="gaussian_noise" ) train_ds = train_ds.map( - lambda x, y: (gridmask(x), y), + lambda x, y: (gridmask(x, training=True), y), num_parallel_calls=tf.data.AUTOTUNE, ) diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 4e5f732af3..826c728ae2 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -108,4 +108,4 @@ def test_in_single_image(self): ) xs = layer(xs, training=True) self.assertTrue(tf.math.reduce_any(xs == 0.0)) - self.assertTrue(tf.math.reduce_any(xs == 1.0)) \ No newline at end of file + self.assertTrue(tf.math.reduce_any(xs == 1.0)) From 796cdd2af4dbeef27f973f67b1c9dcc37e5f8b27 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Mon, 28 Feb 2022 23:21:51 +0100 Subject: [PATCH 36/43] black --- keras_cv/layers/preprocessing/grid_mask_test.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 826c728ae2..9261da47a8 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -101,11 +101,7 @@ def test_in_single_image(self): dtype=tf.float32, ) - layer = GridMask( - ratio="random", - fill_mode="constant", - fill_value=0.0 - ) + layer = GridMask(ratio="random", fill_mode="constant", fill_value=0.0) xs = layer(xs, training=True) self.assertTrue(tf.math.reduce_any(xs == 0.0)) self.assertTrue(tf.math.reduce_any(xs == 1.0)) From 420d6cbd8961f0c4b49dc2d6c88fe6e741d72406 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 26 Mar 2022 14:36:40 +0100 Subject: [PATCH 37/43] merge with master --- .github/API_DESIGN.md | 55 +- .github/CALL_FOR_CONTRIBUTIONS.md | 7 + .github/images/runtime-plot.png | Bin 0 -> 19554 bytes .github/workflows/actions.yml | 20 + .gitignore | 1 + README.md | 2 +- ...an_average_precision_bucket_performance.py | 100 +++ .../mean_average_precision_performance.py | 102 +++ benchmarks/metrics/coco/recall_performance.py | 104 +++ .../vectorization_strategy_benchmark.py | 700 ++++++++++++++++++ .../preprocessing/channel_shuffle_demo.py | 67 ++ examples/layers/preprocessing/cut_mix_demo.py | 13 + .../layers/preprocessing/grid_mask_demo.py | 13 + examples/layers/preprocessing/mix_up_demo.py | 13 + .../random_color_degeneration_demo.py | 62 ++ .../preprocessing/random_cutout_demo.py | 13 + .../layers/preprocessing/random_shear_demo.py | 62 ++ keras_cv/__init__.py | 2 + keras_cv/layers/__init__.py | 25 + keras_cv/layers/preprocessing/__init__.py | 23 + .../layers/preprocessing/auto_contrast.py | 62 ++ .../preprocessing/auto_contrast_test.py | 114 +++ .../layers/preprocessing/channel_shuffle.py | 98 +++ .../preprocessing/channel_shuffle_test.py | 94 +++ keras_cv/layers/preprocessing/grayscale.py | 25 +- .../layers/preprocessing/posterization.py | 103 +++ .../preprocessing/posterization_test.py | 101 +++ .../random_color_degeneration.py | 58 ++ .../random_color_degeneration_test.py | 72 ++ .../layers/preprocessing/random_cutout.py | 70 +- .../preprocessing/random_cutout_test.py | 2 - .../layers/preprocessing/random_sharpness.py | 112 +++ .../preprocessing/random_sharpness_test.py | 71 ++ keras_cv/layers/preprocessing/random_shear.py | 123 +++ .../layers/preprocessing/random_shear_test.py | 38 + keras_cv/layers/preprocessing/solarization.py | 68 +- .../layers/preprocessing/solarization_test.py | 109 ++- keras_cv/layers/regularization/__init__.py | 15 + .../layers/regularization/dropblock_2d.py | 241 ++++++ .../regularization/dropblock_2d_test.py | 94 +++ .../metrics/coco/mean_average_precision.py | 26 +- keras_cv/metrics/coco/recall.py | 6 +- keras_cv/utils/conv_utils.py | 82 ++ keras_cv/utils/preprocessing.py | 203 +++++ keras_cv/utils/preprocessing_test.py | 50 ++ setup.cfg | 4 +- setup.py | 10 +- shell/clean.sh | 2 +- shell/format.sh | 3 +- shell/lint.sh | 4 +- 50 files changed, 3295 insertions(+), 149 deletions(-) create mode 100644 .github/images/runtime-plot.png create mode 100644 benchmarks/metrics/coco/mean_average_precision_bucket_performance.py create mode 100644 benchmarks/metrics/coco/mean_average_precision_performance.py create mode 100644 benchmarks/metrics/coco/recall_performance.py create mode 100644 benchmarks/vectorization_strategy_benchmark.py create mode 100644 examples/layers/preprocessing/channel_shuffle_demo.py create mode 100644 examples/layers/preprocessing/random_color_degeneration_demo.py create mode 100644 examples/layers/preprocessing/random_shear_demo.py create mode 100644 keras_cv/layers/preprocessing/auto_contrast.py create mode 100644 keras_cv/layers/preprocessing/auto_contrast_test.py create mode 100644 keras_cv/layers/preprocessing/channel_shuffle.py create mode 100644 keras_cv/layers/preprocessing/channel_shuffle_test.py create mode 100644 keras_cv/layers/preprocessing/posterization.py create mode 100644 keras_cv/layers/preprocessing/posterization_test.py create mode 100644 keras_cv/layers/preprocessing/random_color_degeneration.py create mode 100644 keras_cv/layers/preprocessing/random_color_degeneration_test.py create mode 100644 keras_cv/layers/preprocessing/random_sharpness.py create mode 100644 keras_cv/layers/preprocessing/random_sharpness_test.py create mode 100644 keras_cv/layers/preprocessing/random_shear.py create mode 100644 keras_cv/layers/preprocessing/random_shear_test.py create mode 100644 keras_cv/layers/regularization/__init__.py create mode 100644 keras_cv/layers/regularization/dropblock_2d.py create mode 100644 keras_cv/layers/regularization/dropblock_2d_test.py create mode 100644 keras_cv/utils/conv_utils.py create mode 100644 keras_cv/utils/preprocessing.py create mode 100644 keras_cv/utils/preprocessing_test.py diff --git a/.github/API_DESIGN.md b/.github/API_DESIGN.md index ec300b3099..190c4af064 100644 --- a/.github/API_DESIGN.md +++ b/.github/API_DESIGN.md @@ -1,29 +1,56 @@ # API Design Guidelines -In general, KerasCV abides to the [API design guidelines of Keras](https://github.com/keras-team/governance/blob/master/keras_api_design_guidelines.md). +In general, KerasCV abides to the + [API design guidelines of Keras](https://github.com/keras-team/governance/blob/master/keras_api_design_guidelines.md). There are a few API guidelines that apply only to KerasCV. These are discussed in this document. -## Label Names -When working with `bounding_box` and `segmentation_map` labels the abbreviations `bbox` and -`segm` are often used. In KerasCV, we will *not* be using these abbreviations. This is done -to ensure full consistency in our naming convention. While the team is fond of the abbreviation -`bbox`, we are loss fond of `segm`. In order to ensure full consistency, we have decided to +# Label Names +When working with `bounding_box` and `segmentation_map` labels the +abbreviations `bbox` and `segm` are often used. In KerasCV, we will *not* be +using these abbreviations. This is done to ensure full consistency in our +naming convention. While the team is fond of the abbreviation `bbox`, we are +loss fond of `segm`. In order to ensure full consistency, we have decided to use the full names for label types in our code base. -## Preprocessing Layers -### Color Based Preprocessing Layers +# Preprocessing Layers +## BaseImageAugmentationLayer +When implementing preprocessing, we encourage users to subclass the +`tf.keras.__internal__.layers.BaseImageAugmentationLayer`. This layer provides + a common `call()` method, auto vectorization, and more. + +When subclassing `BaseImageAugmentationLayer`, several methods can overridden: + +- `BaseImageAugmentationLayer.augment_image()` must be overridden +- `augment_label()` allows updates to be made to labels +- `augment_bounding_box()` allows updates to bounding boxes to be made + +When a canonical layer subclassing BaseImageAugmentationLayer is available, a +link to it will be added here. + +## Vectorization +`BaseImageAugmentationLayer` requires you to implement augmentations in an +image-wise basis instead of using a vectorized approach. This design choice +was based made on the results found in the +[vectorization\_strategy\_benchmark.py](../benchmarks/vectorization_strategy_benchmark.py) +benchmark. + +In short, the benchmark shows that making use of `tf.vectorized_map()` performs +almost identically to a manually vectorized implementation. As such, we have +decided to rely on `tf.vectorized_map()` for performance. + +![Results of vectorization strategy benchmark](images/runtime-plot.png) + +## Color Based Preprocessing Layers Some preprocessing layers in KerasCV perform color based transformations. This -includes `RandomBrightness`, `Equalize`, `Solarization`, and more. Preprocessing -layers that perform color based transformations make the following assumptions: +includes `RandomBrightness`, `Equalize`, `Solarization`, and more. +Preprocessing layers that perform color based transformations make the +following assumptions: + - these layers must accept a `value_range`, which is a tuple of numbers. - `value_range` must default to `(0, 255)` - input images may be of any `dtype` -Additionally, these preprocessing layers should cast back to the input images -original `dtype` before the end of their `call` body. This API design decision -is made to preserve simplicity and provide the easiest API to use. - The decision to support inputs of any `dtype` is made based on the nuance that some Keras layers cast user inputs without the user knowing. For example, if `Solarization` expected user inputs to be of type `int`, and a custom layer diff --git a/.github/CALL_FOR_CONTRIBUTIONS.md b/.github/CALL_FOR_CONTRIBUTIONS.md index 659715622b..22aee4e5fa 100644 --- a/.github/CALL_FOR_CONTRIBUTIONS.md +++ b/.github/CALL_FOR_CONTRIBUTIONS.md @@ -4,6 +4,13 @@ to work on. Should you decide to contribute a component, please comment on the corresponding GitHub issue that you will be working on the component. A team member will then follow up by assigning the issue to you. +## Default Parameters +Default parameters should **not** be set on which values achieve the best scores +on any specific dataset. Instead, parameters should be required, with a +recommended value in the docstring. This is to discourage users from widely +adopting the hyperparameters required to do well on ImageNet2k on datasets +that require a different tuning. + ## Preprocessing Layers KerasCV preprocessing layers allow for construction of state of the art computer vision data augmentation pipelines. Our [CutMix](https://github.com/keras-team/keras-cv/blob/master/keras_cv/layers/preprocessing/cut_mix.py) implementation serves as a sample preprocessing diff --git a/.github/images/runtime-plot.png b/.github/images/runtime-plot.png new file mode 100644 index 0000000000000000000000000000000000000000..41487d56791dc4192e9ed972dad54cf2a6ecc71f GIT binary patch literal 19554 zcmZs@byO727dK2uDJ7tENq3i&fYc%&i_#@Xr}WYaiYUzj(hbrr-Q5k+A>G~a&VGOA zJ-aJ=PB1(&I2aoWg<6bdhvc@Y!Dlsg`UJoL z{FbAy8QeSQ1Mm?)-7)5lYgbD2pt82GF=jPBz-tRs=wY&I@N3p{C_5WoG5^ff`^cuR zB_-VV7flcEVRlC=9rd@zedsB4f`T!IzISJTiYZA+d)k8t?Qg2{#qUnXZV%h=kOIDa z8_iW`y1l*KTxbw2EiHBP_VH_Pe_h|ukiVvf^yE2vs?)~c<;~4lgQuHDv4Qv4#lhcF ze{s+AxuY`zLc*)_S^I-aci9?!vchj`7_oeIJHm~UN8(ouS8_F>kmD+Dk5h}uLS4NzVX^61s*TKY3b)}i|YZHb?E!GSYk?QSxS8KtHs|McEKw5{(Sp`rEU zcXV|bp6Ds}c6Wy>CkRH%%F03^EJKMRbmC%St6(;!#Rd%@U%m39;4u#9@9T?#LSReH z$fN`WFTB0IO)vK6OlhFM&gQrTozqB_$=dHa3_jDJWL+-sfk^Wkf_V zDo3`ow2XFj$@|=2ZGd@I>J)mNO=y~Sg;K6HTr99*;E>TbJ=}YO(eG|9VxK;J3Z4{3 zCC*ysy!9dr&dbH6ic7-%3jNU|1$%q@8p|=pyN$iMnsTrGnvIWBMfz)Z7mHjcCns5O zsGOV}#L&=iYiEazNjaXr&Si%&3tpC*`bq-q|LE+j?)A-0pX9@>I)r7dH=1p&J@=@&AS0o}>g9=!uOf{SNYehRRtdYzrv zw;F;cIdttg>l!;Eb+w}0|Ca59yyiPPzEX*Ml$iEC|As@}S?jXnzVsR666E+5h(~E< zrRn}$jTgwHH%Tv}Bk1JIeC|AI-40aj>^MWnh0G=kv|r1~9dG5Npm2X^eT0I7^0&g0 z(D(j|$)qcEB360j-@oYwPa#1;!QH>5GSu={_9$v zrKa}O`}%bGGX@n6BjdM!U15aO)I%LViMM7d*}$Ww^>W+!+Gu$y*rVY>w8iRAbhx;< znK?OQ`I=nca48=*_e3#i*ST^SdL5yuiXA;77qnUGVMUL#Oq} zhEy68m-1zMsYw@Nox-119w7zdQ8R+~Y;BdEsj?rMt8oen3R)fzTKssnJrPVTnOt8l zHuJ~U`miZ2HkO#H;grDD)%8t0KbBu0wa+iGDLi&_!j0G4Y>X<2-Py_scJ84fPU|H` zEhr#S&Han>nXBlU=u<>6)=Q4Y<-GL0}dv25kj ztuYM0l@()YX=!1fTVW2Jid4^weI#LFVZ95lI=2I}wr@B{*d*M30Q?-CTqNtk+JA(E z47h|Tsj6<|<|MDw9W=I$j6^K@+=NtD3wt++Gcg55F{ze;tda|(3aP0P+27v>Bc%4i zI)h1NK7HbeFJN= z=3~W%l1RxO+bn-bquc;|gVYo^N zsSdIHTk@g)YCYk#ynIwjimH(j&Bdaxr1ex$;c=r(AnyEGrH!p^@r{?IB};}}$O?dg zF;xc$i*~Iu3!cQKQS`+^-hGqV0NIx>U)*o@s^Yjmpn#)7XFXX!%fS)y`!_Q?2glLz z@n|tlRax06wY8m{ij-6frEg>J1}`7qn4};dpG?Nc($bP&QW9m3YD(k2#X@TokLeebKTL$FfygeRtvN1&X^eOTy0|P3u5OhyZ&*S6c(~X_k zDlRZzWM{%}09A!3&SDc@y0A7jHU^UkKpy7&@g%yB*ZKfIA}@h)^lBWx`rKWYf>R1n zQWEjK7n`lJZ(8?~#$51y@IK$0?E;zK-P4n%U2dkOqf_R6;~E)hH`74G#Ke@CnD`f* zRloZay}K)wL}4jMN8Y5DF11@%tO=){_A1dpv0l(>5URl;x@5xGpUjG`_NV6fSP3t5S`%Q0O zGLj+h1jqmp5Rm!Wl~&I(g zSl8lW%Gu5oAr+O%yLUt&76@=EZo0pYnKCRfLT1tY)LXsQRSYf-5KvYSN2kv?CeFpu|3Q*^AMPaMz^03buFs)W36 z&O7?wTok@E>BKhhy>t2E@6RJ7G(~*^(6#*2Ke^kxyXi`6Vx?G4Q$Qn^*Vo4Ybq)b+ zAsybH44q6F`Yvy5bS*S^o;8VFU0)m9*@dR0Q2%_+!7HpGTa!#_?soWhy-WC)YxrTf zg9Pp!i9DNpRQn6ohapQVz?H|_$}*u;;t;?{Z-^q&H-8_wHG#TQZ9fbWq|^u^7ym+zxAiDYvNRX!#e^$xNm_DXd8Ei!?LAG(mm3GupU zbt)Z8e^+U~hNxDeEkr>%1t$fb$*Nh!!y_xZCd;@TY0pD;uWlQMw#LfBa@$7vbRuJ+ zR2a9&0r>09B1RddML9q#j`2R>gB=`PT(YfkUH{qbyYXJg2~s9h4}34H9BsXiMvw^? zGAu)=O8H&I`8G+nhp5DKitO`Wy$>$;O*+%171@!u%tm`>2KdtTNcBidTU**P%gjI0 z=i(wq31ZsWKTJftuW+*I+PLsmVIppe#hl{LV1m)UrZZyZ-t#C{V$nomdsS82`S%F% zqTKex*Ng{={UiMuVk1Xbj4mkwAsuH2Z;LssjFWC@XmlX3QuLztdv@bi?!&24n6&N} z^KNHDIXO8dmXLl$zSNtBb97J{($hv?`Z;d4&Zn&MXtQ;Te<3 zp1OX7ClIwN@8l0v)_2IzXanaZ1H1_6RIcj{gyyMrsjHiOvNuzSP^|zMBO)VP-4?yw zG&l0?pb$i|TUan-VPOG@&f|G*-;^jrZYX!{sG{Oc~kmhIo4{`Y27On@6Rv$I#0m!{0H19Rr=bFN?hu#~5E9Cf52H#Hb=!~GAd~bF zOG`_uu$}z_dAP!pybT9u-*HI%!r}Jv)7=Tsu&3J-Pkeu2wecF-{CIJCQ`LwCjX0ee{NlD4TV~gSL3L(t_bc(v}8g+-$UT+wB?g?lV>U03@ zte~vSz|I~F4l2@Qk^axHFgySZZ{m1meS?#dlI{;AZx2zyO87m`cR!r057ghC&jKuo z3J*8BluS*Z`}+H*n%Mo=zuUN5Ci_Q?EkABFm&|s|woB}Os#zaHd)Z!{6*A56KI5G8 zCM;o%ziCbt5%Rk7DEyY|DiSewhv=Q>&z}Ry=ySJsDI_`tko<3>w$JI36B+XG01cBJ8PhM+Q7*o zCnpE!4~&hU4}AOJU?`5y90Y|MpnsLv)KZIoX<1ooZ|@JYf%s2HE3&S8GaZM^ZI*Ld zRkLqk_Lqr?6lLY*TZ>JSNFZ_lfmKbV-i5mUuvUQ?Ri%O zv-5RvVlUod%y$bhGk^d3gqQ$m*!6*gvo7Ud+@Gr!JbtFh1eTd};kLH5ofYdhB>%bn z1XGp}7f+J#zAgb&1%l`?=A=LWSSw>>?#R4w?)qZ;Pczs{yZPF!n<>LWM{>jJa*gKb zCQdiU`t-RFL)C}go0|m;BZDRwwiRD@`_p;xiLawbnpI?tGm`g5c=z{=;$c5p5oN0vQzf*5(B;w z=){vQYTwO8^_pmVV8SkUiklQvRdXsT;FZ=>+a?Z9PX9JXG5~`L+ReGo|9h5ItItx? ziU4|+y6oYFTyaV4yEFrO_qU1*M$x)BhK0ae1(OTy$8WT?wZVWN!Fv3-bz-8j*BT@M z&{iYB2p|%vuI@&lOA>gC)s2mEAQTh8dQebNNdZaw`1oLx@&E3{ivUs~zG~5j9KedP zi3#kW;R5@L0I4?OyfcIxVS6m6iYVu6T});wEcxvhD1doO21gfod)P`46R7wMQ(v?` z;o=6l3o_DTBpuCZy`Pqy9UIs;aO6k?ZAemQzu5!WIo+SvHDpDZpX&k%4N&ndVKJog zCEWFUm#;@+it0mupAT2L;}S$?rjp-9YeKG)Oz`vJ;UViy1Z$5n_x z*~V@enVA8(>N%rnG8kZM0j+)Li{TIwU$?7M>g?=12C24ibOgCOy{Kt|*_@cX*89Gc zBL2`5iL6BS@e4-uDvq(Wb>DJZAPX}ylF{rR+v_^j19(UX4j`LrNdp6eDlxO0^S$+r|b zY;UPDcv~#bpg6ha8F;JHVHvzF&#q$qx<9>1#Td90H1zZVett+E7yE_wJ9SnQtO)c4 z>`eL6*=W5#PN<<1R6N{IN90L`9cU201YT*%+fYH_dn4K>V8Q?;FmQ8+19U&%okrc1 z+XU^xg_4Fu!)t|cX1%jKe&UFtQ zt}lze4@qtdus|GgL8Mx0gjdf7mfQJYA*sb5gOGyajg%BJP~=DzmSb{@ISfGl{Q5;w zP*AWro=b?Z4sC525O#na6K*y(lLe$;x7%iB;(;UP1vNEq-e6$$yCdkT>*~gGRH^Up zZ|s3chk*jq)ya^g-JKoJHo+#=D%gqK}@bX3ieKEeY!+Q1; z9*M&w#0>&t!)|lw+?;cI!CgSLm!+NwYO6NVkr zOMhq8J)ie|xZ}4>x6Xao%m^#pZ7dy0Cg3V;Yw@G$jHc3MF}8x`upGP z?(VujcH#e6D*}XX|CDaN#?qNC`=yW|; zD12DvH)=-W<>l=j7(g=uA{e+wV2@;g^G3zMedNp$L`X`dV+XNI=O+LlDNM#0jwPWgaZa1)$9BTV_RF> z;Tk*5wdd`iaC|+aw(zAIBse`2Q&d8N=lsa+{2%yV}$YQ5D&9&0h! z-VhMZ$B!Q)s$HONRg+J`&d#n?@6MC2SoPKCwixHwWkmpEBjSwiBCKu6Fs zGH#2C6zS`h-r0fV0wx?MHGiwFww6rK$8xTkXGr3P9gtJ4PkuY75jFy4-(2$Fx!YY^ zlLZp(RkY7}9@?ii^>mI8il+q0cZ=oxe=Pdr11GtQW`(1d?3@1@MyvW2LVE}Os2#?; zr}*-K=H>ac(r%rcA_n6Hf{KoYW+{He5p1~oZfV!y+1XlY7ZsiG<+p(q?8$W5C%}&n zl$rLquSIKKUhfniot*spTWa!mX2A=C+}he&qd=<#h%GE^>`XBG>nR{*f`0sH1-2a^ zGTC@lV;LyC_yP4W3UUuHCMoa7@ArWLX&eFe))@R~e`@(OUg*sV` zXgfi*x87QYx9~$k>iq|Nz7L(j;iO`lXQ{a|dC%TVTUFzPqZbfKUm~kD@0ccdvi(RW zS#>-zmH7LB+?06#wb|@*|CVUrb5{;K?^G0$Dm@eAM<{(+aOCD^M%xqi1k&|10pE$F zUuS1!k@6(QU!0Dq=Bok8^%bH1j>a(mBv1F|-xnWG_#5Uv@=XYiTUGZXVp+Q{>yk`W zx_+;~qN%{*p9ODGPB8G@Qn#IHfLOYZUl3gH>!l#`Q_ z*9pBu|7Kz;LJq~3Eh@+)>NjMl>~2cP=H?kP7-1}?><*gN=o}mzmLOjQWHXr|ws6D9 zq5AJI8{rz{1IC5^Jt1OM3{_y)xyExv4FJ*mhMG~B9uM}=uQ47a4ms7YJUosu!!eo4 zj!#9jdl=KAb?yltsGrMobxzO3`AQzzdsJ+G0CT1MPbYlZLffjZyV1W_xl2H%Lj5)E z$*3}^l8u{h00qq_I9Vm1Dbv_PvWkzQ#F(e@#!+Zke$RWl|NTwvolblDZ`lHo+HawQ zlyMhJVlyI@d@fpN$q0c$qA%@=Vt?S35T(roE9;A@e|n0pv93`i@wUbqu=fvRekcUd zcsY5o{=wR^&-}tiZc)QqnNq_)9Wimai1)ubg2nm%`nZaE>zpt9OHeOa{j=IGA*1+j z;;uJC2OCNH2jTDG9jp6Mq+_PRGvjTq|5H{oaCpq2Jr;;i6q*a}E{RcuR7!6eKY_Y6(WHhJ5XtEO714wY` zQNmC}kwGN0keOLHmJ>1-4~zXD(qHSgo=6OxfJCw~uEgc$?}=@)=RRzJ^UB|?YQVpJ z(Cp=0m+hzH5sCmM#W(vBq<9#3>Va7#-NW<`<@hTkwT=UQ(%-XRr5&|Gi8{sy*ON^R z+QnQmcbofKlq%_eXJA8sPtP@VxuRe(&BcE~P2!sWF91L271i6v7MXCj%4ezR%$>0y z=bP~E65h*ee6Td@NszKXeixfl!mn8;Fd+(vlc7(g@9s)kUT z>6eoc?o;9yn=7MVukJPohuNFQKd?+JY3!_~AmdBlqB}=Jles+SP_#^&uNV#!@TE~6 zBB4dq*ErJOiT#L?#Kb4|cYi>mKtV_Ad0p^X*ObX(ck)(4_K8+t7q<|fkc$OGfX8`d zB15btwg!_VGU6ZtnoKCokNO9THdK+tO~Xe4P zt9-%@bMs2XLM9xa&pQiGQ@qEcZGZ5qG!2tSXt0*-s9 zDE&RrT4>f;PD(m@5*FGucb3`HJXz9ZS(}`F zq6%jF9;(wCpJ9C|w(s;q=_hP(LnQeEW-wnk^NM7qZe7iYv-Z7U`feB;`f_fQgk{aL zx%lOmcSLK0x8D2q>lD9ajO||B>7JQ8Ui7%X{Edc-6O=}G1#51D_GWZq%c;S)KM5YN zpU|zB-|f1J?TKr<9`IgQZkunH5D|jxG<=6;pk~#CqKs@vbsZTUsA+6xHc0;2{_x#Y zj>X4??(sAhMq`V}Fe2#!3_)bsHPx7jt!J`4JS+@N40oMhy3zEi9X}&lvbs|C%ZN(S zFcBdie~&2A)uu)_JnA<<*bd6(fWH6#F2JgaS<=VB&mt7n4t^?wNx#P2aHZ^|vT>Di z8L;)GuYyZafipZ`eZFz5y=z_|`Ukg8&b5B_L|@LCB5hyeeIB>7i!z%L%W%$au72|l zw3o;PEt|Rb%rYw`gh}XF@s#Ph?!DoOFAkG66G+=}>2ERC8Yqh>VX@*qWxF*uAy47y z%u{aMdo<-2L+c8iPf-%kcq_Z!)$(r{ZkA{2ngpsys8iR7R6$9pBG0_do1^u1OpeQC^JkjvF<3 zTu7;t@RB_%lm{%7fP1{Vl=%lrVx3nguxVFxS9oH(;{Q|Hn}99hFMEe41Pn8Dm80~} z|B`AkXHnSqFA-(YvJ4X)PAnH|!Q2_lT7!d-2}x&d*#UKA<0|L2VRKAf#xS`fv6#l- zOYh-iGf%vN^c?gw@kts>Y!oM&hfmQgJofpy{$~I&YdkRPfdKqLXfHbwZ9tzbMLu&c zPGaxNj>PXGyOK>QxPz3J+sVb73-xumZP4wfGK6Q+M#cCmH)UO6sMKUk^s>1Rjz68W zsp=9zk=+>Zl&JXPGGIZI580h6kTYKA6_-l<7X&14&&M;Mz3%_MW-*^dX;@ty+2KM*V9b?NsJc z=*;}eyYGK847x1>V7njb%V(9%8B#5jhqUxqaGOIY+#q}~a+ASEtTG~7Q2A;zIgDh~ z^?R-E``oABLRSM(kzeuq!Fk_fez5QUERE?zLmf;$n$lomx%@ zvdrfUcb0F>VCD$?IKMk+-vS2Qr9E19W+?HC4i-6*os$DiiLs%zud z_}MDXT9{7#VECYfd4~Bui?rh-fTi+Rn4cuWe-?2*B*PsOkOMi<8BvAU_b_PknpliVxO?%_~7LTP7|msqsyS#Sm+ z5i?8&MF=TF0A|C;O(vYp?->%JCG#g;lZY@1dTf52(jS&MRMWTNKD;Bvh^7j4%V(|p zmbMn3^lGszSwMG0IWzQ8=iKTqiRQPo?J|A>OEdwOVesRgIxH5jUI0iag+GQ2&2vFCzcj_!X679B$RGn*hOMl<2NFEHLc(i2|0v#hifKwZD2@l z%w{Yan^?AuQ>l|qTV^8mD;iq`GPmSWPFE#C53s>|yU;9l;Ib z<#Bic$~WKusM#T?_GPWwl^o03Q`*mNDdc$^t`Va1rPXqNhW#?Aay~=MuTF z-+%xNTgusZ#O&yVlz}kNTEe0YpsYnV!0k~|NOQ9A1Y^Vt`jzxn(Yq-do5 z;@q(uLAn+je?uqyHU2I3F9Mc_RDxcNWq2n1R%p42{kz^PpmK{Zn_8U~hCU!lZNo%a zI`bMpG4jVi*U8yPWOK31orZXw7T=twEULaLM{1R6wcN>M)QGX(Dx zis!Bdx%$E5uMXeN9I?`(Y>Hni?c)*RGxsRo0&pN*2-DmakdZ(B>4))o014XbFa6y| z?k7YoPuojQY}vC_BpwFm4&%O`K1r#bu0`w~&rAP40cdZzCIUQFogFh@!(=4aC5CRs%M`L&057I+uuUt_1~5F+y=H+FRx-KZm7fo;%&j?zO)m2Q zczhBlXGILj`oP2A)AYy-rQ*>bauaef85^@~4f*ufX~Y3p4;XM4anF@FfcTqj4!qr( z^YX`+(Q+@i*e@Q~qqSz|_CkC|ek$%mX zux7jrX`OqsXPLolNr#@IjULYr* zDt4*S`fM6=O<(XY@DWj{Oa_9Ed+Cv0ZBo<|+WybXsp2JXAT{%)VB|+Mrn?>W%1X>V(9RCo<@{74j+w6u26bn|@yk^Q@pBXS zz?#+nT-pW$J^iNem71LbTcwxpt><%Du5{p7fJ8b1vZ{nzeU_TJcaQfpbgIejq8*|3 z6}7)qA@5_XFYLqM-xz2`KgW5R69C_0z~OOZP<(OdaLA1alKFpL%2kQw;HmVgn#<_B z)CAoI?W&yT%~Pf>Fuo;GmbD=_%2FuTv)Jb+YiV5CpDfV+#QgCZ-a!5KhdeJvmbF*E z))I8J7u3%UZ)QQDd2unT+p6K{^x#e*-+BKMD1jmjIecH;!g#D5al#vnEDqxBGvTW9 z+aj_xl(9d#$^k7eQa*$Nh44Q$3H2OdhPd!LGv@2AVMt>>q4|B?rbhN-uV?nk=m8^h zEr%z&j|9+8>VJ+9@cUq))$y308xlF~YrCtlS3IVDRL|C2Gl?_#JRfX)l&(X7s?Z{; z#RLEhq@mh&q3*=0IcoV={PHaLm=!Z)7F8tZHIK^yuac^u-rnvPB(`_QA7chCT`H@+ z*@i@XgBPH@6lmJZRrM#S4RDOdPiGj8&%0Z7BV%CAGEnSQHU_5v6xV6~a{MBtIll+6WfE#I%1Z9vX5I81UXb(e z(>a!R=nTunTt$Gq>c@<0$)Kd__#gh~KHZA_`CWOC@w|XDw<~FYwpanx&mYij@}Sso zp$KxaiUL-r7LUzl`;J3u6ufLR=HMOClEd`}v)#CspaR0V#cQAJvtM#!oyy9TG3g}A z5!rCM+#1u2Qyb$UvrmHMNJvh6QnAm^v7b0FHMf!asxr;+mv;2`UtW9$qD?hp=S6O0 zy`ws~m&qQY-CNbw*fB8^t=cN_!Ea;WC6o;^PnSh6!e=NX&84(0ukOUcIpJdL=2p?U zI2M%SQOVdmMr!XSZ$lPmR>cccjRZbhz@>j4PA?-OhAK0lEpl++oifgr8@$UOIfZK2 z)fn-KF^VXQaqW4G!=a69mYRoPoThWqlr8(fk?mSH--Y|rrR6t%IfQn&lT{MAe~3m^ zn`A%sw!JipEjjmZ8PJzu$^6e3VmlQ-M%|i>9+oINlml zl}1N;NY9vl)HaDdAK0y~a^kxcPr^KO$j(The7;`w%RG{xjDS`^Cw5%jWa(ClnRC{$ zZ4k$!;j!6-q@n5Nc6@RET`wewDQk6OOp=ewm?~C@^CWUnN$lW6ETo_OKkok9F(+*t zS^k}ct$_m4la~6rsoyCJ-7!Jb8)&9If*Nw>0)RO~^J{TnzORbFYNjRT&n9E>9kz$ zFXFia&Gb`GjP!Zk@B+;AThiCj5r*dr+uc0bysIDQ1a&BM*0O>O(d8=pYl`{k;M6NH zB1taKGy+nXF##F;s7K4FT{X(UD3V}8e9Szt-I-6tDd>FR*s|Cbgrv$;TPtd0`sruS z$ck4revZ=WM!GZIS^wCx;<^gdVu@#Tr|XTw-4%N;fu;DUcQ$TJ`4@iN$5TKDVmkikP{jja;`vXMUOvvBH$CF^Ub4!x*)Tg5m(*oz{GCvUL7i0g~T z1a{J)vb^>ROG#;IB_fX@R!LqeW}fxqL6Oqxy-Ba!7P8Sln^jfV4S36U$+CPhLS>q> zr~LS6@hCCWdV||^`Jc)hjinyY#D=etCwBCC;*3LLOo$!(CGd4Md10x`Gwk@ zW1rim8AdG>P353CQ@Q2Y?671&bV zD+H-&{LxMIRy^_0cI|uNgfcV%9-gG<5|B*xqyttv#bMhrPwgRQs3z}r&O5mGTcCN^ zQCVxP%d}2B1Vx2}j=AF$Q5yjUKsiTn+47(#7SBsD?729oVT~f`agO50LL-JvhRmX3 za>&(mwSO&-jYgE33XxuUzFgk7@IF~&<;rN8W*!|;at+hNi z-#~~`r$fF*8*X#{E2X8T<^$8P2jq5S>tM=MXn7{wMtQC`5!I=irFU5*Wy5{sLbiYd zyShe3%}za6!*P4m^se1vS6jcWsH{e9rgmnus(}q4+oh-QSj|`Q7!X>6Um4`lnp9aE zi4zTs{mqqx&R!+P0sE6U>~?Ai#sBOCA|KGZQO;3@P#<1TsrppIdC$!-9@C2XP56=5 zuEnxt!c*p4+JB5BHhCrEo%=Qq!jqh>M?;=n6sJZh0h6CKPG<>|y(aV6owiwlI#%mV zmh_Eu5`bKyVz>UgcCZ|v{x5-DfSqM{Vt{RGgil4Iet#FOuvTV>*afTME49`%=>_Vf zbQR>-HVZC+1dJH!2C^!PX&UmzB+^4TRPG7w?q_D&c0m@?+xf2r@2a4-EIXmaCZLBw z9|_8wW^ZD7eobW76_GHya=%jM=nznE)=U|T&~dp%c{iRP?J z2;Hg0JcPJ#ALEhhfJ=tcV#F}-tJrb!{PznEJM@Ood*c7_H`zeuS9GlRDZr> zek2}A)x7g7z}>9?HzoN34twtJ!&WZVL+X82n;Ypa)?)&uxm`1AvF$hY1|a<$iqlke zF6LN!ZuBen46g+JP{N@Q3>nTB0f#6kes@!_J+vh>Bvn<_wsehs9W~3RhV9-hv4KtP z3!}-?-sXJO14fnXP#y)C?)>j|_VmcI?~uV!)lfmZOzXdAyW+NI8K>leyO9KrL;4da zG|zuHIv4By-Rxy=8PX__b+gnp*R1{3TKMOoZf9}oE52(p`VtO{>HY-b&fD8RhBgik zhXRF!@NHrjo*$t6PruH1>@XV$tGk&M)}3lh5Qfjt-8nuTQ8dL8XZcwTQ6dr7Cv#Hu z)I`x0CEVP4%_MX~F0@-4saCn_$VpbDH`-ukmX?5d=2Ky8Tw!U>ty#4?hm+xR7JwhK zNpMTXvKH1^ALD2Z>wvNU&}qM5YRt*fSCVKtaJsJ8H9=I1z(7eGv6SVs*+bd z@;jo2bb*7Cgm^SQb#n00AJ^=-I~=(!S(s*_bI@d=D#okL7}h^cRq3lM>o_r1&oo`Y zB}3xc#ha!_p%4~H!ATdQw%dqP4A2$|4{pWobfxSAmF>~LqZQ%;@R?=Dh;M&aZM>%e zVE?Qyb44n04Z%EhHgm>P{nC)QweI7uQ=5&+Nl~ODuPXVuu{bR%hcnL2x5zl_XQkAH z!5Sy26a6BF`(t7H0A@qGt5Eo{CY^zLO>fq%l2^q8-MzxSKd8xRy)DoD#byRu6KLb?|)d2B4!nEj{wEQdhHAZUbPD_?@ zezJ01vH*pIr$>p!Eh8cI{hL>8mA4Kn7|&%n5H)g8V&wiatHnNmz4^6poVT%!^GLLPiQ{l35+57`$MY zN*e8j&7G~^v>VKi6mI0l%ep=CyuMU;1yCg=cEp{s`PrdjAH;+{Bg{-5m|!3s%MOg7}8r10`Lfv__aY(o-8i6{qF$ zrNV@7{*h&Pj@_3EmnaBfX1sAVk)}Xe4kc_wjNN%Pb?xt@8fB3EN5T)mPg#2? zdiGyVa}A!kg>T=~>oq>wIBz|IWSriX?Zpg`Z*8ypWUy^-7Ex|bm=QzH-YPbRDhw){ zO)hBfHkG6x8nT<>{(nMkfedh@)a+$cptI+eBKtcN7hy`6DFK51RirkW6C;C7ZmWi+ zmhds3K3;67KkLk9EzG||*`tsRLK?klT|;1tl^<4McGa?`6|c?PFy#!&-k;;%T1Q#? zAK%HdElPQudQD)B(l+jc6UbQY_2r#9=EgsoGokOSO53D4tSZ}c=whBH>^$m{JN4-GriN_3~Uqd^(3Tb`k(*Y<*U7hN%Qa4 zdz3$6(kE!zz^Mc?H9_zjq=KO!$gu6$zi-4spnms)_g_O-zH){2cMUd0<`fzZrR^Lx z<#1Q0Vt%RZtpIXTQ^NFzueNI89wo-*Px?Bo%AaIufZd4^cDP5P-h22SkoX96JG#dA z)sO!3&}TY8W|x|m2dNCQvNe5-QcvB<+%(R&F1>ku^(eO-eMbpndCpFl2uByO(etT| zPn36|_Y5F$jQU95$MgJoBJ%Y*gd2hPsspka6KA3a!~LOB3j2K#s5COMycj2L;Jg0e||xc z1h$UaXY2!%1z5+9>(dejr*@I@HNU0~N8Zq?|}E zoLabV(AS73S7-gjh~0}UVF>UD$kS-9QiSwO_kUr{iv34?5>C+n2q9xkqzW-Di?Td3 zUdMmjlgX58OJ-*79!+X*)tN#`^)jw7#TWL>4N}WmP*ZF~*VB)O^&BuHpZRD+n_<7p zWLZHuDY*bN< zRGv`ns1b=1-9fP*Z8HN)R#j<{`m8Lt50`OA{fYGk&N@KyyB?4+$mpsYM*C2LS_cB*hE*ib>7v0tS=A33LI)jW!NS~^S z)GCUP_zBzopZz?3CsdunRp)Ed7ldq#3T(y77p`tb#;FV!f9XfmoW68gVl2O7w{VX7 zttGle0Luwv-QIK^cyk#&VSe*P9pHYdv;T$^VbqmiJIdH5In-w0}rmMh~_j%QpX8Y;)^JK#?6c z6rR8fQ#QanLODe7%Q2r{uz7hbO^;9q>eD&ZIcwr?c6bU~>{GpIb4m(Ai-1|ptq^R` zRaY{watgxVp`VGR=n}l6c_C7|tDu&9Vm#gcTUY44ye541<1sx~mL75e5+*Kg3?uGF zC@9mEa$C`Y3npl8x^5C!Hv!yXCFq**EQSe1b^aCGGpC&~hvB-oTy82Y@`ex`s?Kmj!QSo4pvpWaUDz7*gV=DWxynP??G zaDm~2PMBqdtA*`$NpLnGf_IMDK5&CZ!Fg>xGl!^?V0&1M-HbZ_fOs%W>ZM*Pn2jN( zdztZ7x{@xp`rYBPEJhM1tTi1PB8O+vnMiz@a6_^k4*>>1IEX`Vbvjg0V%xG((t&+Q z^7a72_diAAZ0kx`4n*t=j$%xD1Hbo6*`3L<`Yqt2Nm{G_=!4Ux0>g@`iUxu~1fAKU+t%M*T;S0~jH4GNKW^ zNH-so%apUr2Nd-;0dzANntcyGF?x%LXCmVr>6Lsa#Z_t~zC{erzBwXKg z;C&CddP1#9ZFZilh_Qhy;Obqd;mEqj%hRLXPH*~&=ZG0s`_tu5^;1EmCA4ySGBuQ# z)Y4BKb95vl$E5S4vKL7GWOd_O1>pXRJ`N{P?b8m9s88sK(?_h_MK5hTok{&`BKpX* zHGc$&Cn&#he4ax_-^eLI4@C4d=KmIV8a>{E}vh|2&i`FSLlc zm(udB;=RqjcI(Brbc(8J@ShC)M*P13PT=CV2mgVTyF#^^t0n6}CrMOd$8!76RVMi{ z_6zQNvmBBSF@8A;sK}$O!7GeO7OPjx6{FGm{RytfC+KHG>C8W#3VgSL=5ee^%0IN_ z@T3@jFax6rhq<~}%-C7g!gUp#9R*8!w)9?lW9}%GI*%pOH?w~pG3(JvMN?w7nnQ6K z!ap1i8J2zGTQL5|8Csw|DtmN9Q9Dt> zn*jRah*|hYio%jWIp5zP6mEY4HcvT zCkd&t8KqJ}39gDF9@)6+Cm{Whs0yN zm<^o>8J$Nhecwk6Zqm_hdx74*ikoMZnSsDgNH*l2kKe~-v(Ap&>3&6%wj3U~n>XC3D&L5DXD_ZktFuxp2 zjXLt>i>P_COu6qp>VG7GzWXRfG2YsYeCAl}=AvofbBgNvZQJZtFO0>gYETkCl6vdx z=&Re+PsLkK#+RR4R5YlY1m-KP24QM?(rxyzeNYA-F7YECW0P=NmSh$+e>Aeg8UioH z=TG~{1LG5=-_kHb-g?>_IAG(QJBav0eN<%Qm5JHO=c_C+dHhFG4#tit?l@n-eB_0% zz<4NxDiWAqQk&uYZpum#r;iV-)cJQ%>pjTES%g~mLuLQYmtOkrQdyWWdG-7*Nc3_F zf9F>AQB0O$cV8=Dp7JMjhH^Rn{~D)4p6TK3!sFG~;DwlVqR$!1>*hOF!TP_fk5L`` z_k$V1|K75KoQccwOQv$s6A;P_#ps zob^ocS)<{>nLn?$veOl#BK1&r-+68LxZ)^8@x-g?&{b*X{|k2pi232;aFwJwm*qRo z&8+M4;_@nBn}7E9 za&NC&>TTu6ej;z1U9Y~t7%!g%Yz9Ua-FDZdrJQ;Yx z%OicvR{(?kTIf{n*9CAV@OxmM?-$-~U5%rEY8U_oYo8Za$yy;^C@ zAR0UQO+a5S_XRe2c@{9GX**yFFe@XU<+8rO#*DmCyK>rqr%4OEcNp7&?!XGt5}2<7 zon3x(Y?IP*U+(43-cC)&dqcZ&zAi|M1U(OY1eoV`bOgJ1@J=V6>o{*O;hN*E7kge# zy0kUtn%T=<{ugkrmrrE#$>ckaBQGobr}+CwceRDL-9r|ufZf1C(jDj#M9_%?9F_s^kY*lobGl9ns2Uky2r^OEM14_bzqnWVx#(<-Sa@CV=y zGkY@|PAsbJW_B2OucThUQ^1IvRVq1>t_F6S*>Xwa^48}xbETQ>lJqWMp`?x3$~?w- zd%2S@@_MoVmed!(KX<$QL^hvH-v2oAvOrQ7l7oNL%>L-&4q-chnY{rlB83`yNou&k z%IbR5KCiC?UZdsTN8VfkOp>$%c$^fnxu4?OjhXH8&)uYd>b2xO&hw68H8alDnKbz@ zm}3(y7v`;3D{UD>J;7U@gat*M`w~0l!9k>`+>2|LJzVGrOezDfO z&v`y08lD^=1+}BDPn(1b|q&i8PJ-3t+%h0^pfXSe^Jr}F7GaBj#94ur#zo}jn`e} z$E=X_r04#@ZQS7c-rF``eNa>)L0M34%mmg*n%;tAMg#8wUMIcoKHJP{l@=$t4Yl%;8cW@ z56TKl*(o0ZZU^3O=3&dsQ@Kjo3N$$BwZMln?dF)-5nzj?ZKRBlR{v$Zck}AY0)na^GsEzRfxRkAOvHc8%xM|I;b@6tK|Dro5vfg@8T^TnF4r%JLWl z$0cI&LD^wuualOFyD=l)0bJtcNgTTqZVA^*I>Y1iLr6)l763PUDr6+RTT+kK>V6U` zIhGdoOBwlM;B$WNk-$YR|0{5Xhm?@i!*%NkRGQg*(%i-966hKfh;Z^jxpE(neahxn zz=tHQCgo&r?p=14zmu{YJ_CHs%=Q7ZNfWCplonW@MQMKFP6vJ}X=RiCe+g7e+5r5R zl)Z4TnY}{#XYiPZa9$#Xm(Bv_x$G~bbyS1kghUp~AjlC(oq#T8w$FcIJO^B0W(RUs zEhq>PlMjM?=}vmmrwb_#e@i&{pe>0z`5*{_0uwR$AP9m26HYz|f}p^JlMjL*C@|sV kgCGbBOgQ-<2!aChKip%&p5vFEP5=M^07*qoM6N<$f?6PxumAu6 literal 0 HcmV?d00001 diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index 5c35e53660..fe298e2e5a 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -63,3 +63,23 @@ jobs: pip install -e ".[tests]" --progress-bar off --upgrade - name: Lint run: bash shell/lint.sh + deploy: + needs: [test, format] + if: github.event_name == 'release' && github.event.action == 'created' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.gitignore b/.gitignore index e2058ddc37..55509b1c35 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ keras_cv.egg-info/ +dist/ .DS_Store build/ *.swp diff --git a/README.md b/README.md index 584503907b..65008bda88 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # KerasCV [![](https://github.com/keras-team/keras-cv/workflows/Tests/badge.svg?branch=master)](https://github.com/keras-team/keras-cv/actions?query=workflow%3ATests+branch%3Amaster) ![Python](https://img.shields.io/badge/python-v3.7.0+-success.svg) -![Tensorflow](https://img.shields.io/badge/tensorflow-v2.5.0+-success.svg) +![Tensorflow](https://img.shields.io/badge/tensorflow-v2.8.0+-success.svg) [![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/keras-team/keras-cv/issues) KerasCV is a repository of modular building blocks (layers, metrics, losses, data-augmentation) that diff --git a/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py b/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py new file mode 100644 index 0000000000..40238853f5 --- /dev/null +++ b/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py @@ -0,0 +1,100 @@ +import math +import random +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import tensorflow as tf + +import keras_cv +from keras_cv.metrics import coco + + +def produce_random_data(include_confidence=False, num_images=128, num_classes=20): + """Generates a fake list of bounding boxes for use in this test. + + Returns: + a tensor list of size [128, 25, 5/6]. This represents 128 images, 25 bboxes + and 5/6 dimensions to represent each bbox depending on if confidence is + set. + """ + images = [] + for _ in range(num_images): + num_boxes = math.floor(25 * random.uniform(0, 1)) + classes = np.floor(np.random.rand(num_boxes, 1) * num_classes) + bboxes = np.random.rand(num_boxes, 4) + boxes = np.concatenate([bboxes, classes], axis=-1) + if include_confidence: + confidence = np.random.rand(num_boxes, 1) + boxes = np.concatenate([boxes, confidence], axis=-1) + images.append( + keras_cv.utils.bounding_box.xywh_to_corners( + tf.constant(boxes, dtype=tf.float32) + ) + ) + + images = [ + keras_cv.utils.bounding_box.pad_bounding_box_batch_to_shape( + x, [25, images[0].shape[1]] + ) + for x in images + ] + return tf.stack(images, axis=0) + + +y_true = produce_random_data() +y_pred = produce_random_data(include_confidence=True) +class_ids = list(range(20)) + +bucket_values = [500, 1000, 2000, 3500, 5000, 7500, 10000] + +update_state_runtimes = [] +result_runtimes = [] +end_to_end_runtimes = [] + +for buckets in bucket_values: + metric = coco.COCOMeanAveragePrecision(class_ids, num_buckets=buckets) + # warm up + metric.update_state(y_true, y_pred) + metric.result() + + start = time.time() + metric.update_state(y_true, y_pred) + update_state_done = time.time() + r = metric.result() + end = time.time() + + update_state_runtimes.append(update_state_done - start) + result_runtimes.append(end - update_state_done) + end_to_end_runtimes.append(end - start) + + print("end_to_end_runtimes", end_to_end_runtimes) + +data = pd.DataFrame( + { + "bucket_values": bucket_values, + "update_state_runtimes": update_state_runtimes, + "result_runtimes": result_runtimes, + "end_to_end_runtimes": end_to_end_runtimes, + } +) + +sns.lineplot(data=data, x="bucket_values", y="update_state_runtimes") +plt.xlabel("Number of Confidence Buckets") +plt.ylabel("update_state() runtime (seconds)") +plt.title("Runtime of update_state()") +plt.show() + +sns.lineplot(data=data, x="bucket_values", y="result_runtimes") +plt.xlabel("Number of Confidence Buckets") +plt.ylabel("result() runtime (seconds)") +plt.title("Runtime of result()") +plt.show() + +sns.lineplot(data=data, x="bucket_values", y="end_to_end_runtimes") +plt.xlabel("Number of Confidence Buckets") +plt.ylabel("End to end runtime (seconds)") +plt.title("Runtimes of update_state() followed by result()") +plt.show() diff --git a/benchmarks/metrics/coco/mean_average_precision_performance.py b/benchmarks/metrics/coco/mean_average_precision_performance.py new file mode 100644 index 0000000000..0b8ae67e7c --- /dev/null +++ b/benchmarks/metrics/coco/mean_average_precision_performance.py @@ -0,0 +1,102 @@ +import math +import random +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import tensorflow as tf + +import keras_cv +from keras_cv.metrics import coco + + +def produce_random_data(include_confidence=False, num_images=128, num_classes=20): + """Generates a fake list of bounding boxes for use in this test. + + Returns: + a tensor list of size [128, 25, 5/6]. This represents 128 images, 25 bboxes + and 5/6 dimensions to represent each bbox depending on if confidence is + set. + """ + images = [] + for _ in range(num_images): + num_boxes = math.floor(25 * random.uniform(0, 1)) + classes = np.floor(np.random.rand(num_boxes, 1) * num_classes) + bboxes = np.random.rand(num_boxes, 4) + boxes = np.concatenate([bboxes, classes], axis=-1) + if include_confidence: + confidence = np.random.rand(num_boxes, 1) + boxes = np.concatenate([boxes, confidence], axis=-1) + images.append( + keras_cv.utils.bounding_box.xywh_to_corners( + tf.constant(boxes, dtype=tf.float32) + ) + ) + + images = [ + keras_cv.utils.bounding_box.pad_bounding_box_batch_to_shape( + x, [25, images[0].shape[1]] + ) + for x in images + ] + return tf.stack(images, axis=0) + + +y_true = produce_random_data() +y_pred = produce_random_data(include_confidence=True) +class_ids = list(range(20)) + +n_images = [128, 256, 512, 512 + 256, 1024] + +update_state_runtimes = [] +result_runtimes = [] +end_to_end_runtimes = [] + +for images in n_images: + y_true = produce_random_data(num_images=images) + y_pred = produce_random_data(num_images=images, include_confidence=True) + metric = coco.COCOMeanAveragePrecision(class_ids) + # warm up + metric.update_state(y_true, y_pred) + metric.result() + + start = time.time() + metric.update_state(y_true, y_pred) + update_state_done = time.time() + r = metric.result() + end = time.time() + + update_state_runtimes.append(update_state_done - start) + result_runtimes.append(end - update_state_done) + end_to_end_runtimes.append(end - start) + + print("end_to_end_runtimes", end_to_end_runtimes) + +data = pd.DataFrame( + { + "n_images": n_images, + "update_state_runtimes": update_state_runtimes, + "result_runtimes": result_runtimes, + "end_to_end_runtimes": end_to_end_runtimes, + } +) + +sns.lineplot(data=data, x="n_images", y="update_state_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("update_state() runtime (seconds)") +plt.title("Runtime of update_state()") +plt.show() + +sns.lineplot(data=data, x="n_images", y="result_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("result() runtime (seconds)") +plt.title("Runtime of result()") +plt.show() + +sns.lineplot(data=data, x="n_images", y="end_to_end_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("End to end runtime (seconds)") +plt.title("Runtimes of update_state() followed by result()") +plt.show() diff --git a/benchmarks/metrics/coco/recall_performance.py b/benchmarks/metrics/coco/recall_performance.py new file mode 100644 index 0000000000..07fd84bfa8 --- /dev/null +++ b/benchmarks/metrics/coco/recall_performance.py @@ -0,0 +1,104 @@ +import math +import random +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +import tensorflow as tf + +import keras_cv +from keras_cv.metrics import coco + + +def produce_random_data(include_confidence=False, num_images=128, num_classes=20): + """Generates a fake list of bounding boxes for use in this test. + + Returns: + a tensor list of size [128, 25, 5/6]. This represents 128 images, 25 bboxes + and 5/6 dimensions to represent each bbox depending on if confidence is + set. + """ + images = [] + for _ in range(num_images): + num_boxes = math.floor(25 * random.uniform(0, 1)) + classes = np.floor(np.random.rand(num_boxes, 1) * num_classes) + bboxes = np.random.rand(num_boxes, 4) + boxes = np.concatenate([bboxes, classes], axis=-1) + if include_confidence: + confidence = np.random.rand(num_boxes, 1) + boxes = np.concatenate([boxes, confidence], axis=-1) + images.append( + keras_cv.utils.bounding_box.xywh_to_corners( + tf.constant(boxes, dtype=tf.float32) + ) + ) + + images = [ + keras_cv.utils.bounding_box.pad_bounding_box_batch_to_shape( + x, [25, images[0].shape[1]] + ) + for x in images + ] + return tf.stack(images, axis=0) + + +y_true = produce_random_data() +y_pred = produce_random_data(include_confidence=True) +class_ids = list(range(20)) + +n_images = [128, 256, 512, 512 + 256, 1024] + + +update_state_runtimes = [] +result_runtimes = [] +end_to_end_runtimes = [] + +for images in n_images: + y_true = produce_random_data(num_images=images) + y_pred = produce_random_data(num_images=images, include_confidence=True) + metric = coco.COCORecall(class_ids) + # warm up + metric.update_state(y_true, y_pred) + metric.result() + + start = time.time() + metric.update_state(y_true, y_pred) + update_state_done = time.time() + r = metric.result() + end = time.time() + + update_state_runtimes.append(update_state_done - start) + result_runtimes.append(end - update_state_done) + end_to_end_runtimes.append(end - start) + + print("end_to_end_runtimes", end_to_end_runtimes) + + +data = pd.DataFrame( + { + "n_images": n_images, + "update_state_runtimes": update_state_runtimes, + "result_runtimes": result_runtimes, + "end_to_end_runtimes": end_to_end_runtimes, + } +) + +sns.lineplot(data=data, x="n_images", y="update_state_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("update_state() runtime (seconds)") +plt.title("Runtime of update_state()") +plt.show() + +sns.lineplot(data=data, x="n_images", y="result_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("result() runtime (seconds)") +plt.title("Runtime of result()") +plt.show() + +sns.lineplot(data=data, x="n_images", y="end_to_end_runtimes") +plt.xlabel("Number of Images") +plt.ylabel("End to end runtime (seconds)") +plt.title("Runtimes of update_state() followed by result()") +plt.show() diff --git a/benchmarks/vectorization_strategy_benchmark.py b/benchmarks/vectorization_strategy_benchmark.py new file mode 100644 index 0000000000..8dbafee670 --- /dev/null +++ b/benchmarks/vectorization_strategy_benchmark.py @@ -0,0 +1,700 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +# Setup/utils +""" +import time + +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow.keras as keras +import tensorflow.keras.layers as layers +from tensorflow.keras import backend + +from keras_cv.utils import bounding_box +from keras_cv.utils import fill_utils + + +def single_rectangle_mask(corners, mask_shape): + """Computes masks of rectangles + + Args: + corners: tensor of rectangle coordinates with shape (batch_size, 4) in + corners format (x0, y0, x1, y1). + mask_shape: a shape tuple as (width, height) indicating the output + width and height of masks. + + Returns: + boolean masks with shape (batch_size, width, height) where True values + indicate positions within rectangle coordinates. + """ + # add broadcasting axes + corners = corners[..., tf.newaxis, tf.newaxis] + + # split coordinates + x0 = corners[0] + y0 = corners[1] + x1 = corners[2] + y1 = corners[3] + + # repeat height and width + width, height = mask_shape + x0_rep = tf.repeat(x0, height, axis=0) + y0_rep = tf.repeat(y0, width, axis=1) + x1_rep = tf.repeat(x1, height, axis=0) + y1_rep = tf.repeat(y1, width, axis=1) + + # range grid + range_row = tf.range(0, height, dtype=corners.dtype) + range_col = tf.range(0, width, dtype=corners.dtype) + range_row = range_row[:, tf.newaxis] + range_col = range_col[tf.newaxis, :] + + # boolean masks + mask_x0 = tf.less_equal(x0_rep, range_col) + mask_y0 = tf.less_equal(y0_rep, range_row) + mask_x1 = tf.less(range_col, x1_rep) + mask_y1 = tf.less(range_row, y1_rep) + + masks = mask_x0 & mask_y0 & mask_x1 & mask_y1 + + return masks + + +def fill_single_rectangle(image, centers_x, centers_y, widths, heights, fill_values): + """Fill rectangles with fill value into images. + + Args: + images: Tensor of images to fill rectangles into. + centers_x: Tensor of positions of the rectangle centers on the x-axis. + centers_y: Tensor of positions of the rectangle centers on the y-axis. + widths: Tensor of widths of the rectangles + heights: Tensor of heights of the rectangles + fill_values: Tensor with same shape as images to get rectangle fill from. + Returns: + images with filled rectangles. + """ + images_shape = tf.shape(image) + images_height = images_shape[0] + images_width = images_shape[1] + + xywh = tf.stack([centers_x, centers_y, widths, heights], axis=0) + xywh = tf.cast(xywh, tf.float32) + corners = bounding_box.xywh_to_corners(xywh) + + mask_shape = (images_width, images_height) + is_rectangle = single_rectangle_mask(corners, mask_shape) + is_rectangle = tf.expand_dims(is_rectangle, -1) + + images = tf.where(is_rectangle, fill_values, image) + return images + + +""" +# Layer Implementations +## Fully Vectorized +""" + + +class VectorizedRandomCutout(layers.Layer): + def __init__( + self, + height_factor, + width_factor, + fill_mode="constant", + fill_value=0.0, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + + self.height_lower, self.height_upper = self._parse_bounds(height_factor) + self.width_lower, self.width_upper = self._parse_bounds(width_factor) + + if fill_mode not in ["gaussian_noise", "constant"]: + raise ValueError( + '`fill_mode` should be "gaussian_noise" ' + f'or "constant". Got `fill_mode`={fill_mode}' + ) + + if not isinstance(self.height_lower, type(self.height_upper)): + raise ValueError( + "`height_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.height_lower), type(self.height_upper) + ) + ) + if not isinstance(self.width_lower, type(self.width_upper)): + raise ValueError( + "`width_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.width_lower), type(self.width_upper) + ) + ) + + if self.height_upper < self.height_lower: + raise ValueError( + "`height_factor` cannot have upper bound less than " + "lower bound, got {}".format(height_factor) + ) + self._height_is_float = isinstance(self.height_lower, float) + if self._height_is_float: + if not self.height_lower >= 0.0 or not self.height_upper <= 1.0: + raise ValueError( + "`height_factor` must have values between [0, 1] " + "when is float, got {}".format(height_factor) + ) + + if self.width_upper < self.width_lower: + raise ValueError( + "`width_factor` cannot have upper bound less than " + "lower bound, got {}".format(width_factor) + ) + self._width_is_float = isinstance(self.width_lower, float) + if self._width_is_float: + if not self.width_lower >= 0.0 or not self.width_upper <= 1.0: + raise ValueError( + "`width_factor` must have values between [0, 1] " + "when is float, got {}".format(width_factor) + ) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.seed = seed + + def _parse_bounds(self, factor): + if isinstance(factor, (tuple, list)): + return factor[0], factor[1] + else: + return type(factor)(0), factor + + @tf.function + def call(self, inputs, training=True): + if training is None: + training = backend.learning_phase() + + augment = lambda: self._random_cutout(inputs) + no_augment = lambda: inputs + return tf.cond(tf.cast(training, tf.bool), augment, no_augment) + + def _random_cutout(self, inputs): + """Apply random cutout.""" + center_x, center_y = self._compute_rectangle_position(inputs) + rectangle_height, rectangle_width = self._compute_rectangle_size(inputs) + rectangle_fill = self._compute_rectangle_fill(inputs) + inputs = fill_utils.fill_rectangle( + inputs, + center_x, + center_y, + rectangle_width, + rectangle_height, + rectangle_fill, + ) + return inputs + + def _compute_rectangle_position(self, inputs): + input_shape = tf.shape(inputs) + batch_size, image_height, image_width = ( + input_shape[0], + input_shape[1], + input_shape[2], + ) + center_x = tf.random.uniform( + shape=[batch_size], + minval=0, + maxval=image_width, + dtype=tf.int32, + seed=self.seed, + ) + center_y = tf.random.uniform( + shape=[batch_size], + minval=0, + maxval=image_height, + dtype=tf.int32, + seed=self.seed, + ) + return center_x, center_y + + def _compute_rectangle_size(self, inputs): + input_shape = tf.shape(inputs) + batch_size, image_height, image_width = ( + input_shape[0], + input_shape[1], + input_shape[2], + ) + height = tf.random.uniform( + [batch_size], + minval=self.height_lower, + maxval=self.height_upper, + dtype=tf.float32, + ) + width = tf.random.uniform( + [batch_size], + minval=self.width_lower, + maxval=self.width_upper, + dtype=tf.float32, + ) + + if self._height_is_float: + height = height * tf.cast(image_height, tf.float32) + + if self._width_is_float: + width = width * tf.cast(image_width, tf.float32) + + height = tf.cast(tf.math.ceil(height), tf.int32) + width = tf.cast(tf.math.ceil(width), tf.int32) + + height = tf.minimum(height, image_height) + width = tf.minimum(width, image_width) + + return height, width + + def _compute_rectangle_fill(self, inputs): + input_shape = tf.shape(inputs) + if self.fill_mode == "constant": + fill_value = tf.fill(input_shape, self.fill_value) + else: + # gaussian noise + fill_value = tf.random.normal(input_shape) + + return fill_value + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +""" +## tf.map_fn +""" + + +class MapFnRandomCutout(layers.Layer): + def __init__( + self, + height_factor, + width_factor, + fill_mode="constant", + fill_value=0.0, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + + self.height_lower, self.height_upper = self._parse_bounds(height_factor) + self.width_lower, self.width_upper = self._parse_bounds(width_factor) + + if fill_mode not in ["gaussian_noise", "constant"]: + raise ValueError( + '`fill_mode` should be "gaussian_noise" ' + f'or "constant". Got `fill_mode`={fill_mode}' + ) + + if not isinstance(self.height_lower, type(self.height_upper)): + raise ValueError( + "`height_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.height_lower), type(self.height_upper) + ) + ) + if not isinstance(self.width_lower, type(self.width_upper)): + raise ValueError( + "`width_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.width_lower), type(self.width_upper) + ) + ) + + if self.height_upper < self.height_lower: + raise ValueError( + "`height_factor` cannot have upper bound less than " + "lower bound, got {}".format(height_factor) + ) + self._height_is_float = isinstance(self.height_lower, float) + if self._height_is_float: + if not self.height_lower >= 0.0 or not self.height_upper <= 1.0: + raise ValueError( + "`height_factor` must have values between [0, 1] " + "when is float, got {}".format(height_factor) + ) + + if self.width_upper < self.width_lower: + raise ValueError( + "`width_factor` cannot have upper bound less than " + "lower bound, got {}".format(width_factor) + ) + self._width_is_float = isinstance(self.width_lower, float) + if self._width_is_float: + if not self.width_lower >= 0.0 or not self.width_upper <= 1.0: + raise ValueError( + "`width_factor` must have values between [0, 1] " + "when is float, got {}".format(width_factor) + ) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.seed = seed + + def _parse_bounds(self, factor): + if isinstance(factor, (tuple, list)): + return factor[0], factor[1] + else: + return type(factor)(0), factor + + @tf.function + def call(self, inputs, training=True): + + augment = lambda: tf.map_fn(self._random_cutout, inputs) + no_augment = lambda: inputs + return tf.cond(tf.cast(training, tf.bool), augment, no_augment) + + def _random_cutout(self, input): + center_x, center_y = self._compute_rectangle_position(input) + rectangle_height, rectangle_width = self._compute_rectangle_size(input) + rectangle_fill = self._compute_rectangle_fill(input) + input = fill_single_rectangle( + input, + center_x, + center_y, + rectangle_width, + rectangle_height, + rectangle_fill, + ) + return input + + def _compute_rectangle_position(self, inputs): + input_shape = tf.shape(inputs) + image_height, image_width = ( + input_shape[0], + input_shape[1], + ) + center_x = tf.random.uniform( + shape=[], + minval=0, + maxval=image_width, + dtype=tf.int32, + seed=self.seed, + ) + center_y = tf.random.uniform( + shape=[], + minval=0, + maxval=image_height, + dtype=tf.int32, + seed=self.seed, + ) + return center_x, center_y + + def _compute_rectangle_size(self, inputs): + input_shape = tf.shape(inputs) + image_height, image_width = ( + input_shape[0], + input_shape[1], + ) + height = tf.random.uniform( + [], + minval=self.height_lower, + maxval=self.height_upper, + dtype=tf.float32, + ) + width = tf.random.uniform( + [], + minval=self.width_lower, + maxval=self.width_upper, + dtype=tf.float32, + ) + + if self._height_is_float: + height = height * tf.cast(image_height, tf.float32) + + if self._width_is_float: + width = width * tf.cast(image_width, tf.float32) + + height = tf.cast(tf.math.ceil(height), tf.int32) + width = tf.cast(tf.math.ceil(width), tf.int32) + + height = tf.minimum(height, image_height) + width = tf.minimum(width, image_width) + + return height, width + + def _compute_rectangle_fill(self, inputs): + input_shape = tf.shape(inputs) + if self.fill_mode == "constant": + fill_value = tf.fill(input_shape, self.fill_value) + else: + # gaussian noise + fill_value = tf.random.normal(input_shape) + + return fill_value + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +""" +## tf.vectorized_map +""" + + +class VMapRandomCutout(layers.Layer): + def __init__( + self, + height_factor, + width_factor, + fill_mode="constant", + fill_value=0.0, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + + self.height_lower, self.height_upper = self._parse_bounds(height_factor) + self.width_lower, self.width_upper = self._parse_bounds(width_factor) + + if fill_mode not in ["gaussian_noise", "constant"]: + raise ValueError( + '`fill_mode` should be "gaussian_noise" ' + f'or "constant". Got `fill_mode`={fill_mode}' + ) + + if not isinstance(self.height_lower, type(self.height_upper)): + raise ValueError( + "`height_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.height_lower), type(self.height_upper) + ) + ) + if not isinstance(self.width_lower, type(self.width_upper)): + raise ValueError( + "`width_factor` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.width_lower), type(self.width_upper) + ) + ) + + if self.height_upper < self.height_lower: + raise ValueError( + "`height_factor` cannot have upper bound less than " + "lower bound, got {}".format(height_factor) + ) + self._height_is_float = isinstance(self.height_lower, float) + if self._height_is_float: + if not self.height_lower >= 0.0 or not self.height_upper <= 1.0: + raise ValueError( + "`height_factor` must have values between [0, 1] " + "when is float, got {}".format(height_factor) + ) + + if self.width_upper < self.width_lower: + raise ValueError( + "`width_factor` cannot have upper bound less than " + "lower bound, got {}".format(width_factor) + ) + self._width_is_float = isinstance(self.width_lower, float) + if self._width_is_float: + if not self.width_lower >= 0.0 or not self.width_upper <= 1.0: + raise ValueError( + "`width_factor` must have values between [0, 1] " + "when is float, got {}".format(width_factor) + ) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.seed = seed + + def _parse_bounds(self, factor): + if isinstance(factor, (tuple, list)): + return factor[0], factor[1] + else: + return type(factor)(0), factor + + @tf.function + def call(self, inputs, training=True): + augment = lambda: tf.vectorized_map(self._random_cutout, inputs) + no_augment = lambda: inputs + return tf.cond(tf.cast(training, tf.bool), augment, no_augment) + + def _random_cutout(self, input): + center_x, center_y = self._compute_rectangle_position(input) + rectangle_height, rectangle_width = self._compute_rectangle_size(input) + rectangle_fill = self._compute_rectangle_fill(input) + input = fill_single_rectangle( + input, + center_x, + center_y, + rectangle_width, + rectangle_height, + rectangle_fill, + ) + return input + + def _compute_rectangle_position(self, inputs): + input_shape = tf.shape(inputs) + image_height, image_width = ( + input_shape[0], + input_shape[1], + ) + center_x = tf.random.uniform( + shape=[], + minval=0, + maxval=image_width, + dtype=tf.int32, + seed=self.seed, + ) + center_y = tf.random.uniform( + shape=[], + minval=0, + maxval=image_height, + dtype=tf.int32, + seed=self.seed, + ) + return center_x, center_y + + def _compute_rectangle_size(self, inputs): + input_shape = tf.shape(inputs) + image_height, image_width = ( + input_shape[0], + input_shape[1], + ) + height = tf.random.uniform( + [], + minval=self.height_lower, + maxval=self.height_upper, + dtype=tf.float32, + ) + width = tf.random.uniform( + [], + minval=self.width_lower, + maxval=self.width_upper, + dtype=tf.float32, + ) + + if self._height_is_float: + height = height * tf.cast(image_height, tf.float32) + + if self._width_is_float: + width = width * tf.cast(image_width, tf.float32) + + height = tf.cast(tf.math.ceil(height), tf.int32) + width = tf.cast(tf.math.ceil(width), tf.int32) + + height = tf.minimum(height, image_height) + width = tf.minimum(width, image_width) + + return height, width + + def _compute_rectangle_fill(self, inputs): + input_shape = tf.shape(inputs) + if self.fill_mode == "constant": + fill_value = tf.fill(input_shape, self.fill_value) + else: + # gaussian noise + fill_value = tf.random.normal(input_shape) + + return fill_value + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +""" +# Benchmarking +""" +(x_train, _), _ = keras.datasets.cifar10.load_data() +x_train = x_train.astype(float) + +x_train.shape + + +images = [] + +num_images = [1000, 2000, 5000, 10000, 25000, 37500, 50000] + +results = {} + +for aug in [VectorizedRandomCutout, VMapRandomCutout, MapFnRandomCutout]: + c = aug.__name__ + layer = aug(0.2, 0.2) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + + results[c] = runtimes + +plt.figure() +for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + +plt.ylabel("Runtime (seconds)") +plt.legend() +plt.show() + +""" +# Sanity check +all of these should have comparable outputs +""" + +images = [] +for aug in [VectorizedRandomCutout, VMapRandomCutout, MapFnRandomCutout]: + layer = aug(0.5, 0.5) + images.append(layer(x_train[:3])) +images = [y for x in images for y in x] + + +plt.figure(figsize=(8, 8)) +for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") +plt.show() + +""" +# Extra notes + +## Warnings +it would be really annoying as a user to use an official keras_cv component and get +warned that "RandomUniform" or "RandomUniformInt" inside pfor may not get the same +output. +""" diff --git a/examples/layers/preprocessing/channel_shuffle_demo.py b/examples/layers/preprocessing/channel_shuffle_demo.py new file mode 100644 index 0000000000..02604d27b0 --- /dev/null +++ b/examples/layers/preprocessing/channel_shuffle_demo.py @@ -0,0 +1,67 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""channel_shuffle_demo.py shows how to use the ChannelShuffle preprocessing layer. + +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" + +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_datasets as tfds + +from keras_cv.layers import preprocessing + +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + + +def resize(image, label, num_classes=10): + image = tf.image.resize(image, IMG_SIZE) + label = tf.one_hot(label, num_classes) + return image, label + + +def main(): + data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) + train_ds = data["train"] + + num_classes = ds_info.features["label"].num_classes + + train_ds = ( + train_ds.map(lambda x, y: resize(x, y, num_classes=num_classes)) + .shuffle(10 * BATCH_SIZE) + .batch(BATCH_SIZE) + ) + + channel_shuffle = preprocessing.ChannelShuffle() + train_ds = train_ds.map( + lambda x, y: (channel_shuffle(x, training=True), y), + num_parallel_calls=tf.data.AUTOTUNE, + ) + + for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() + + +if __name__ == "__main__": + main() diff --git a/examples/layers/preprocessing/cut_mix_demo.py b/examples/layers/preprocessing/cut_mix_demo.py index ab3262ce1e..bd00acb8fd 100644 --- a/examples/layers/preprocessing/cut_mix_demo.py +++ b/examples/layers/preprocessing/cut_mix_demo.py @@ -1,3 +1,16 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """cut_mix_demo.py shows how to use the CutMix preprocessing layer. Operates on the oxford_flowers102 dataset. In this script the flowers diff --git a/examples/layers/preprocessing/grid_mask_demo.py b/examples/layers/preprocessing/grid_mask_demo.py index 55691a2653..46718fdf52 100644 --- a/examples/layers/preprocessing/grid_mask_demo.py +++ b/examples/layers/preprocessing/grid_mask_demo.py @@ -1,3 +1,16 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """gridmask_demo.py shows how to use the GridMask preprocessing layer. Operates on the oxford_flowers102 dataset. In this script the flowers diff --git a/examples/layers/preprocessing/mix_up_demo.py b/examples/layers/preprocessing/mix_up_demo.py index a873b708da..8ff37c4361 100644 --- a/examples/layers/preprocessing/mix_up_demo.py +++ b/examples/layers/preprocessing/mix_up_demo.py @@ -1,3 +1,16 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """mix_up_demo.py shows how to use the MixUp preprocessing layer. Uses the oxford_flowers102 dataset. In this script the flowers diff --git a/examples/layers/preprocessing/random_color_degeneration_demo.py b/examples/layers/preprocessing/random_color_degeneration_demo.py new file mode 100644 index 0000000000..b3cee86839 --- /dev/null +++ b/examples/layers/preprocessing/random_color_degeneration_demo.py @@ -0,0 +1,62 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_color_degeneration_demo.py shows how to use RandomColorDegeneration. + +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_datasets as tfds + +from keras_cv.layers import preprocessing + +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + + +def resize(image, label): + image = tf.image.resize(image, IMG_SIZE) + return image, label + + +def main(): + data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) + train_ds = data["train"] + + train_ds = ( + train_ds.take(1) + .repeat() + .map(lambda x, y: resize(x, y)) + .shuffle(10 * BATCH_SIZE) + .batch(BATCH_SIZE) + ) + random_color_degeneration = preprocessing.RandomColorDegeneration(factor=(0, 1.0)) + train_ds = train_ds.map( + lambda x, y: (random_color_degeneration(x), y), + num_parallel_calls=tf.data.AUTOTUNE, + ) + + for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() + + +if __name__ == "__main__": + main() diff --git a/examples/layers/preprocessing/random_cutout_demo.py b/examples/layers/preprocessing/random_cutout_demo.py index 91aab5f85b..b33d4bcf29 100644 --- a/examples/layers/preprocessing/random_cutout_demo.py +++ b/examples/layers/preprocessing/random_cutout_demo.py @@ -1,3 +1,16 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """random_cutout_demo.py shows how to use the RandomCutout preprocessing layer. Operates on the oxford_flowers102 dataset. In this script the flowers diff --git a/examples/layers/preprocessing/random_shear_demo.py b/examples/layers/preprocessing/random_shear_demo.py new file mode 100644 index 0000000000..939eefc658 --- /dev/null +++ b/examples/layers/preprocessing/random_shear_demo.py @@ -0,0 +1,62 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_shear_demo.py shows how to use the RandomShear preprocessing layer. + +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_datasets as tfds + +from keras_cv.layers import preprocessing + +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + + +def resize(image, label): + image = tf.image.resize(image, IMG_SIZE) + return image, label + + +def main(): + data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) + train_ds = data["train"] + + train_ds = ( + train_ds.map(lambda x, y: resize(x, y)) + .shuffle(10 * BATCH_SIZE) + .batch(BATCH_SIZE) + ) + random_cutout = preprocessing.RandomShear( + x=(0, 1), + y=0.5, + ) + train_ds = train_ds.map( + lambda x, y: (random_cutout(x), y), num_parallel_calls=tf.data.AUTOTUNE + ) + + for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() + + +if __name__ == "__main__": + main() diff --git a/keras_cv/__init__.py b/keras_cv/__init__.py index bef5afd909..ef221d43c0 100644 --- a/keras_cv/__init__.py +++ b/keras_cv/__init__.py @@ -15,3 +15,5 @@ from keras_cv import layers from keras_cv import metrics from keras_cv import utils + +__version__ = "master" diff --git a/keras_cv/layers/__init__.py b/keras_cv/layers/__init__.py index c52378b849..125aabdb6a 100644 --- a/keras_cv/layers/__init__.py +++ b/keras_cv/layers/__init__.py @@ -12,7 +12,32 @@ # See the License for the specific language governing permissions and # limitations under the License. +from tensorflow.keras.layers import CenterCrop +from tensorflow.keras.layers import RandomBrightness +from tensorflow.keras.layers import RandomContrast +from tensorflow.keras.layers import RandomCrop +from tensorflow.keras.layers import RandomFlip +from tensorflow.keras.layers import RandomHeight +from tensorflow.keras.layers import RandomRotation +from tensorflow.keras.layers import RandomTranslation +from tensorflow.keras.layers import RandomWidth +from tensorflow.keras.layers import RandomZoom +from tensorflow.keras.layers import Rescaling +from tensorflow.keras.layers import Resizing + +from keras_cv.layers.preprocessing.auto_contrast import AutoContrast +from keras_cv.layers.preprocessing.channel_shuffle import ChannelShuffle from keras_cv.layers.preprocessing.cut_mix import CutMix +from keras_cv.layers.preprocessing.equalization import Equalization +from keras_cv.layers.preprocessing.grayscale import Grayscale +from keras_cv.layers.preprocessing.grid_mask import GridMask from keras_cv.layers.preprocessing.mix_up import MixUp +from keras_cv.layers.preprocessing.posterization import Posterization +from keras_cv.layers.preprocessing.random_color_degeneration import ( + RandomColorDegeneration, +) from keras_cv.layers.preprocessing.random_cutout import RandomCutout +from keras_cv.layers.preprocessing.random_sharpness import RandomSharpness +from keras_cv.layers.preprocessing.random_shear import RandomShear from keras_cv.layers.preprocessing.solarization import Solarization +from keras_cv.layers.regularization.dropblock_2d import DropBlock2D diff --git a/keras_cv/layers/preprocessing/__init__.py b/keras_cv/layers/preprocessing/__init__.py index d3bc653855..f2617724c0 100644 --- a/keras_cv/layers/preprocessing/__init__.py +++ b/keras_cv/layers/preprocessing/__init__.py @@ -12,10 +12,33 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Also export the image KPLs from core keras, so that user can import all the image +# KPLs from one place. +from tensorflow.keras.layers import CenterCrop +from tensorflow.keras.layers import RandomBrightness +from tensorflow.keras.layers import RandomContrast +from tensorflow.keras.layers import RandomCrop +from tensorflow.keras.layers import RandomFlip +from tensorflow.keras.layers import RandomHeight +from tensorflow.keras.layers import RandomRotation +from tensorflow.keras.layers import RandomTranslation +from tensorflow.keras.layers import RandomWidth +from tensorflow.keras.layers import RandomZoom +from tensorflow.keras.layers import Rescaling +from tensorflow.keras.layers import Resizing + +from keras_cv.layers.preprocessing.auto_contrast import AutoContrast +from keras_cv.layers.preprocessing.channel_shuffle import ChannelShuffle from keras_cv.layers.preprocessing.cut_mix import CutMix from keras_cv.layers.preprocessing.equalization import Equalization from keras_cv.layers.preprocessing.grayscale import Grayscale from keras_cv.layers.preprocessing.grid_mask import GridMask from keras_cv.layers.preprocessing.mix_up import MixUp +from keras_cv.layers.preprocessing.posterization import Posterization +from keras_cv.layers.preprocessing.random_color_degeneration import ( + RandomColorDegeneration, +) from keras_cv.layers.preprocessing.random_cutout import RandomCutout +from keras_cv.layers.preprocessing.random_sharpness import RandomSharpness +from keras_cv.layers.preprocessing.random_shear import RandomShear from keras_cv.layers.preprocessing.solarization import Solarization diff --git a/keras_cv/layers/preprocessing/auto_contrast.py b/keras_cv/layers/preprocessing/auto_contrast.py new file mode 100644 index 0000000000..ff5f23391c --- /dev/null +++ b/keras_cv/layers/preprocessing/auto_contrast.py @@ -0,0 +1,62 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +class AutoContrast(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Performs the AutoContrast operation on an image. + + Auto contrast stretches the values of an image across the entire available + `value_range`. This makes differences between pixels more obvious. An example of + this is if an image only has values `[0, 1]` out of the range `[0, 255]`, auto + contrast will change the `1` values to be `255`. + + Args: + value_range: the range of values the incoming images will have. + Represented as a two number tuple written [low, high]. + This is typically either `[0, 1]` or `[0, 255]` depending + on how your preprocessing pipeline is setup. Defaults to + `[0, 255].` + """ + + def __init__( + self, + value_range=(0, 255), + **kwargs, + ): + super().__init__(**kwargs) + self.value_range = value_range + + def augment_image(self, image, transformation=None): + original_image = image + image = preprocessing.transform_value_range( + image, original_range=self.value_range, target_range=(0, 255) + ) + + low = tf.reduce_min(tf.reduce_min(image, axis=0), axis=0) + high = tf.reduce_max(tf.reduce_max(image, axis=0), axis=0) + scale = 255.0 / (high - low) + offset = -low * scale + + image = image * scale[None, None] + offset[None, None] + result = tf.clip_by_value(image, 0.0, 255.0) + result = preprocessing.transform_value_range( + result, original_range=(0, 255), target_range=self.value_range + ) + # don't process NaN channels + result = tf.where(tf.math.is_nan(result), original_image, result) + return result diff --git a/keras_cv/layers/preprocessing/auto_contrast_test.py b/keras_cv/layers/preprocessing/auto_contrast_test.py new file mode 100644 index 0000000000..cc9cc338b6 --- /dev/null +++ b/keras_cv/layers/preprocessing/auto_contrast_test.py @@ -0,0 +1,114 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class AutoContrastTest(tf.test.TestCase): + def test_constant_channels_dont_get_nanned(self): + img = tf.constant([1, 1], dtype=tf.float32) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=0) + + layer = preprocessing.AutoContrast(value_range=(0, 255)) + ys = layer(img) + + self.assertTrue(tf.math.reduce_any(ys[0] == 1.0)) + self.assertTrue(tf.math.reduce_any(ys[0] == 1.0)) + + def test_auto_contrast_expands_value_range(self): + img = tf.constant([0, 128], dtype=tf.float32) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=0) + + layer = preprocessing.AutoContrast(value_range=(0, 255)) + ys = layer(img) + + self.assertTrue(tf.math.reduce_any(ys[0] == 0.0)) + self.assertTrue(tf.math.reduce_any(ys[0] == 255.0)) + + def test_auto_contrast_different_values_per_channel(self): + img = tf.constant( + [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=tf.float32 + ) + img = tf.expand_dims(img, axis=0) + + layer = preprocessing.AutoContrast(value_range=(0, 255)) + ys = layer(img) + + self.assertTrue(tf.math.reduce_any(ys[0, ..., 0] == 0.0)) + self.assertTrue(tf.math.reduce_any(ys[0, ..., 1] == 0.0)) + + self.assertTrue(tf.math.reduce_any(ys[0, ..., 0] == 255.0)) + self.assertTrue(tf.math.reduce_any(ys[0, ..., 1] == 255.0)) + + self.assertAllClose( + ys, + [ + [ + [ + [ + 0.0, + 0.0, + 0.0, + ], + [ + 85.0, + 85.0, + 85.0, + ], + ], + [ + [ + 170.0, + 170.0, + 170.0, + ], + [ + 255.0, + 255.0, + 255.0, + ], + ], + ] + ], + ) + + def test_auto_contrast_expands_value_range_uint8(self): + img = tf.constant([0, 128], dtype=tf.uint8) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=0) + + layer = preprocessing.AutoContrast(value_range=(0, 255)) + ys = layer(img) + + self.assertTrue(tf.math.reduce_any(ys[0] == 0.0)) + self.assertTrue(tf.math.reduce_any(ys[0] == 255.0)) + + def test_auto_contrast_properly_converts_value_range(self): + img = tf.constant([0, 0.5], dtype=tf.float32) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=-1) + img = tf.expand_dims(img, axis=0) + + layer = preprocessing.AutoContrast(value_range=(0, 1)) + ys = layer(img) + + self.assertTrue(tf.math.reduce_any(ys[0] == 0.0)) + self.assertTrue(tf.math.reduce_any(ys[0] == 1.0)) diff --git a/keras_cv/layers/preprocessing/channel_shuffle.py b/keras_cv/layers/preprocessing/channel_shuffle.py new file mode 100644 index 0000000000..b10b40d21a --- /dev/null +++ b/keras_cv/layers/preprocessing/channel_shuffle.py @@ -0,0 +1,98 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from tensorflow.keras import layers + + +class ChannelShuffle(layers.Layer): + """Shuffle channels of an input image. + + Input shape: + The expected images should be [0-255] pixel ranges. + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + + Args: + groups: Number of groups to divide the input channels. Default 3. + seed: Integer. Used to create a random seed. + + Call arguments: + images: Tensor representing images of shape + `(batch_size, width, height, channels)`, with dtype tf.float32 / tf.uint8, + ` or (width, height, channels)`, with dtype tf.float32 / tf.uint8 + training: A boolean argument that determines whether the call should be run + in inference mode or training mode. Default: True. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + channel_shuffle = keras_cv.layers.ChannelShuffle() + augmented_images = channel_shuffle(images) + ``` + """ + + def __init__(self, groups=3, seed=None, **kwargs): + super().__init__(**kwargs) + self.groups = groups + self.seed = seed + + def _channel_shuffling(self, images): + unbatched = images.shape.rank == 3 + + if unbatched: + images = tf.expand_dims(images, axis=0) + + height = tf.shape(images)[1] + width = tf.shape(images)[2] + num_channels = images.shape[3] + + if not num_channels % self.groups == 0: + raise ValueError( + "The number of input channels should be " + "divisible by the number of groups." + f"Received: channels={num_channels}, groups={self.groups}" + ) + + channels_per_group = num_channels // self.groups + images = tf.reshape( + images, [-1, height, width, self.groups, channels_per_group] + ) + images = tf.transpose(images, perm=[3, 1, 2, 4, 0]) + images = tf.random.shuffle(images, seed=self.seed) + images = tf.transpose(images, perm=[4, 1, 2, 3, 0]) + images = tf.reshape(images, [-1, height, width, num_channels]) + + if unbatched: + images = tf.squeeze(images, axis=0) + + return images + + def call(self, images, training=True): + if training: + return self._channel_shuffling(images) + else: + return images + + def get_config(self): + config = super().get_config() + config.update({"groups": self.groups, "seed": self.seed}) + return config + + def compute_output_shape(self, input_shape): + return input_shape diff --git a/keras_cv/layers/preprocessing/channel_shuffle_test.py b/keras_cv/layers/preprocessing/channel_shuffle_test.py new file mode 100644 index 0000000000..fc41dbcce0 --- /dev/null +++ b/keras_cv/layers/preprocessing/channel_shuffle_test.py @@ -0,0 +1,94 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv.layers.preprocessing.channel_shuffle import ChannelShuffle + + +class ChannelShuffleTest(tf.test.TestCase): + def test_return_shapes(self): + xs = tf.ones((2, 512, 512, 3)) + + layer = ChannelShuffle(groups=3) + xs = layer(xs, training=True) + self.assertEqual(xs.shape, [2, 512, 512, 3]) + + def test_channel_shuffle_call_results_one_channel(self): + xs = tf.cast( + tf.stack( + [3 * tf.ones((40, 40, 1)), 2 * tf.ones((40, 40, 1))], + axis=0, + ), + dtype=tf.float32, + ) + + layer = ChannelShuffle(groups=1) + xs = layer(xs, training=True) + self.assertTrue(tf.math.reduce_any(xs[0] == 3.0)) + self.assertTrue(tf.math.reduce_any(xs[1] == 2.0)) + + def test_channel_shuffle_call_results_multi_channel(self): + xs = tf.cast( + tf.stack( + [3 * tf.ones((40, 40, 20)), 2 * tf.ones((40, 40, 20))], + axis=0, + ), + dtype=tf.float32, + ) + + layer = ChannelShuffle(groups=5) + xs = layer(xs, training=True) + self.assertTrue(tf.math.reduce_any(xs[0] == 3.0)) + self.assertTrue(tf.math.reduce_any(xs[1] == 2.0)) + + def test_non_square_image(self): + xs = tf.cast( + tf.stack( + [2 * tf.ones((1024, 512, 1)), tf.ones((1024, 512, 1))], + axis=0, + ), + dtype=tf.float32, + ) + + layer = ChannelShuffle(groups=1) + xs = layer(xs, training=True) + self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) + self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) + + def test_in_tf_function(self): + xs = tf.cast( + tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + dtype=tf.float32, + ) + + layer = ChannelShuffle(groups=1) + + @tf.function + def augment(x): + return layer(x, training=True) + + xs = augment(xs) + self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) + self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) + + def test_in_single_image(self): + xs = tf.cast( + tf.ones((512, 512, 1)), + dtype=tf.float32, + ) + + layer = ChannelShuffle(groups=1) + xs = layer(xs, training=True) + self.assertTrue(tf.math.reduce_any(xs == 1.0)) diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py index 6195678cac..4281544482 100644 --- a/keras_cv/layers/preprocessing/grayscale.py +++ b/keras_cv/layers/preprocessing/grayscale.py @@ -13,11 +13,9 @@ # limitations under the License. import tensorflow as tf -from tensorflow.keras import backend -from tensorflow.keras import layers -class Grayscale(layers.Layer): +class Grayscale(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images. Input images should have values in the range of [0, 255]. @@ -56,7 +54,7 @@ def _check_input_params(self, output_channels): ) self.output_channels = output_channels - def _rgb_to_grayscale(self, image): + def augment_image(self, image, transformation=None): grayscale = tf.image.rgb_to_grayscale(image) if self.output_channels == 1: return grayscale @@ -65,25 +63,6 @@ def _rgb_to_grayscale(self, image): else: raise ValueError("Unsupported value for `output_channels`.") - def call(self, images, training=None): - """call method for the ChannelShuffle layer. - Args: - images: Tensor representing images of shape - [batch_size, width, height, channels] or - [width, height, channels] with type float or int. - Pixel values should be in the range [0, 255] - Returns: - images: augmented images, same shape as input. - """ - if training is None: - training = backend.learning_phase() - - return tf.cond( - tf.cast(training, tf.bool), - lambda: self._rgb_to_grayscale(images), - lambda: images, - ) - def get_config(self): config = { "output_channels": self.output_channels, diff --git a/keras_cv/layers/preprocessing/posterization.py b/keras_cv/layers/preprocessing/posterization.py new file mode 100644 index 0000000000..46ec6dd5a0 --- /dev/null +++ b/keras_cv/layers/preprocessing/posterization.py @@ -0,0 +1,103 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.keras.__internal__.layers import BaseImageAugmentationLayer + +from keras_cv.utils.preprocessing import transform_value_range + + +class Posterization(BaseImageAugmentationLayer): + """Reduces the number of bits for each color channel. + + References: + - [AutoAugment: Learning Augmentation Policies from Data]( + https://arxiv.org/abs/1805.09501 + ) + - [RandAugment: Practical automated data augmentation with a reduced search space]( + https://arxiv.org/abs/1909.13719 + ) + + Args: + bits: integer. The number of bits to keep for each channel. Must be a value + between 1-8. + value_range: a tuple or a list of two elements. The first value represents + the lower bound for values in passed images, the second represents the + upper bound. Images passed to the layer should have values within + `value_range`. Defaults to `(0, 255)`. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + print(images[0, 0, 0]) + # [59 62 63] + # Note that images are Tensors with values in the range [0, 255] and uint8 dtype + posterization = Posterization(bits=4, value_range=[0, 255]) + images = posterization(images) + print(images[0, 0, 0]) + # [48., 48., 48.] + # NOTE: the layer will output values in tf.float32, regardless of input dtype. + ``` + + Call arguments: + inputs: input tensor in two possible formats: + 1. single 3D (HWC) image or 4D (NHWC) batch of images. + 2. A dict of tensors where the images are under `"images"` key. + """ + + def __init__(self, bits: int, value_range=(0, 255), **kwargs): + super().__init__(**kwargs) + + if not len(value_range) == 2: + raise ValueError( + "value_range must be a sequence of two elements. " + f"Received: {value_range}" + ) + + if not (0 < bits < 9): + raise ValueError(f"Bits value must be between 1-8. Received bits: {bits}.") + + self._shift = 8 - bits + self._value_range = value_range + + def augment_image(self, image, transformation=None): + image = transform_value_range( + images=image, + original_range=self._value_range, + target_range=[0, 255], + ) + image = tf.cast(image, tf.uint8) + + image = self._posterize(image) + + image = tf.cast(image, self.compute_dtype) + return transform_value_range( + images=image, + original_range=[0, 255], + target_range=self._value_range, + ) + + def _batch_augment(self, inputs): + # Skip the use of vectorized_map or map_fn as the implementation is already + # vectorized + return self._augment(inputs) + + def _posterize(self, image): + return tf.bitwise.left_shift( + tf.bitwise.right_shift(image, self._shift), self._shift + ) + + def get_config(self): + config = {"bits": 8 - self.shift, "value_range": self._value_range} + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/posterization_test.py b/keras_cv/layers/preprocessing/posterization_test.py new file mode 100644 index 0000000000..cc7cf9f6d8 --- /dev/null +++ b/keras_cv/layers/preprocessing/posterization_test.py @@ -0,0 +1,101 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import tensorflow as tf + +from keras_cv.layers.preprocessing.posterization import Posterization + + +class PosterizationTest(tf.test.TestCase): + rng = tf.random.Generator.from_non_deterministic_state() + + def test_raises_error_on_invalid_bits_parameter(self): + invalid_values = [-1, 0, 9, 24] + for value in invalid_values: + with self.assertRaises(ValueError): + Posterization(bits=value, value_range=[0, 1]) + + def test_raises_error_on_invalid_value_range(self): + invalid_ranges = [(1,), [1, 2, 3]] + for value_range in invalid_ranges: + with self.assertRaises(ValueError): + Posterization(bits=1, value_range=value_range) + + def test_single_image(self): + bits = self._get_random_bits() + dummy_input = self.rng.uniform(shape=(224, 224, 3), maxval=256) + expected_output = self._calc_expected_output(dummy_input, bits=bits) + + layer = Posterization(bits=bits, value_range=[0, 255]) + output = layer(dummy_input) + + self.assertAllEqual(output, expected_output) + + def _get_random_bits(self): + return int(self.rng.uniform(shape=(), minval=1, maxval=9, dtype=tf.int32)) + + def test_single_image_rescaled(self): + bits = self._get_random_bits() + dummy_input = self.rng.uniform(shape=(224, 224, 3), maxval=1.0) + expected_output = self._calc_expected_output(dummy_input * 255, bits=bits) / 255 + + layer = Posterization(bits=bits, value_range=[0, 1]) + output = layer(dummy_input) + + self.assertAllClose(output, expected_output) + + def test_batched_input(self): + bits = self._get_random_bits() + dummy_input = self.rng.uniform(shape=(2, 224, 224, 3), maxval=256) + + expected_output = [] + for image in dummy_input: + expected_output.append(self._calc_expected_output(image, bits=bits)) + expected_output = tf.stack(expected_output) + + layer = Posterization(bits=bits, value_range=[0, 255]) + output = layer(dummy_input) + + self.assertAllEqual(output, expected_output) + + def test_works_with_xla(self): + dummy_input = self.rng.uniform(shape=(2, 224, 224, 3)) + layer = Posterization(bits=4, value_range=[0, 1]) + + @tf.function(jit_compile=True) + def apply(x): + return layer(x) + + apply(dummy_input) + + @staticmethod + def _calc_expected_output(image, bits): + """Posterization in numpy, based on Albumentations: + + The algorithm is basically: + 1. create a lookup table of all possible input pixel values to pixel values + after posterize + 2. map each pixel in the input to created lookup table. + + Source: + https://github.com/albumentations-team/albumentations/blob/89a675cbfb2b76f6be90e7049cd5211cb08169a5/albumentations/augmentations/functional.py#L407 + """ + dtype = image.dtype + image = tf.cast(image, tf.uint8) + + lookup_table = np.arange(0, 256, dtype=np.uint8) + mask = ~np.uint8(2 ** (8 - bits) - 1) + lookup_table &= mask + + return tf.cast(lookup_table[image], dtype) diff --git a/keras_cv/layers/preprocessing/random_color_degeneration.py b/keras_cv/layers/preprocessing/random_color_degeneration.py new file mode 100644 index 0000000000..4af513087b --- /dev/null +++ b/keras_cv/layers/preprocessing/random_color_degeneration.py @@ -0,0 +1,58 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +class RandomColorDegeneration(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Randomly performs the color degeneration operation on given images. + + The sharpness operation first converts an image to gray scale, then back to color. + It then takes a weighted average between original image and the degenerated image. + This makes colors appear more dull. + + Args: + factor: Either a tuple of two floats or a single float. `factor` controls the + extent to which the image sharpness is impacted. `factor=0.0` makes this + layer perform a no-op operation, while a value of 1.0 uses the degenerated + result entirely. Values between 0 and 1 result in linear interpolation + between the original image and the sharpened image. + + Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` is + sampled between the two values for every image augmented. If a single float + is used, a value between `0.0` and the passed float is sampled. In order to + ensure the value is always the same, please pass a tuple with two identical + floats: `(0.5, 0.5)`. + """ + + def __init__( + self, + factor, + **kwargs, + ): + super().__init__(**kwargs) + self.factor = preprocessing.parse_factor_value_range(factor) + + def get_random_transformation(self): + if self.factor[0] == self.factor[1]: + return self.factor[0] + return self._random_generator.random_uniform( + (), self.factor[0], self.factor[1], dtype=tf.float32 + ) + + def augment_image(self, image, transformation=None): + degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image)) + result = preprocessing.blend(image, degenerate, transformation) + return result diff --git a/keras_cv/layers/preprocessing/random_color_degeneration_test.py b/keras_cv/layers/preprocessing/random_color_degeneration_test.py new file mode 100644 index 0000000000..16fa93e77a --- /dev/null +++ b/keras_cv/layers/preprocessing/random_color_degeneration_test.py @@ -0,0 +1,72 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class RandomColorDegenerationTest(tf.test.TestCase): + def test_random_color_degeneration_base_case(self): + img_shape = (50, 50, 3) + xs = tf.stack( + [2 * tf.ones(img_shape), tf.ones(img_shape)], + axis=0, + ) + + layer = preprocessing.RandomColorDegeneration(0.0) + ys = layer(xs) + + self.assertEqual(xs.shape, ys.shape) + + def test_color_degeneration_full_factor(self): + img_shape = (50, 50, 1) + r = tf.ones(img_shape) + g = 2 * tf.ones(img_shape) + b = 3 * tf.ones(img_shape) + xs = tf.concat([r, g, b], axis=-1) + + layer = preprocessing.RandomColorDegeneration(factor=(1, 1)) + ys = layer(xs) + + # Color degeneration uses standard luma conversion for RGB->Grayscale. + # The formula for luma is result= 0.2989*r + 0.5870*g + 0.1140*b + luma_result = 0.2989 + 2 * 0.5870 + 3 * 0.1140 + self.assertAllClose(ys, tf.ones_like(ys) * luma_result) + + def test_color_degeneration_70p_factor(self): + img_shape = (50, 50, 1) + r = tf.ones(img_shape) + g = 2 * tf.ones(img_shape) + b = 3 * tf.ones(img_shape) + xs = tf.concat([r, g, b], axis=-1) + + layer = preprocessing.RandomColorDegeneration(factor=(0.7, 0.7)) + ys = layer(xs) + + # Color degeneration uses standard luma conversion for RGB->Grayscale. + # The formula for luma is result= 0.2989*r + 0.5870*g + 0.1140*b + luma_result = 0.2989 + 2 * 0.5870 + 3 * 0.1140 + + # with factor=0.7, luma_result should be blended at a 70% rate with the original + r_result = luma_result * 0.7 + 1 * 0.3 + g_result = luma_result * 0.7 + 2 * 0.3 + b_result = luma_result * 0.7 + 3 * 0.3 + + r = ys[..., 0] + g = ys[..., 1] + b = ys[..., 2] + + self.assertAllClose(r, tf.ones_like(r) * r_result) + self.assertAllClose(g, tf.ones_like(g) * g_result) + self.assertAllClose(b, tf.ones_like(b) * b_result) diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py index 7944022ceb..7baa1baaef 100644 --- a/keras_cv/layers/preprocessing/random_cutout.py +++ b/keras_cv/layers/preprocessing/random_cutout.py @@ -46,6 +46,12 @@ class RandomCutout(layers.Layer): - *gaussian_noise*: Pixels are filled with random gaussian noise. fill_value: a float represents the value to be filled inside the patches when `fill_mode="constant"`. + num_cutouts: One of: + - An integer representing the absolute number of cutouts + - A tuple of size 2, representing the range for the number of cutouts. + For example, `num_cutouts=10` results in 10 cutouts. + `num_cutouts=(2,8)` results in num_cutouts between [2, 8]. Can be used + to implement coarse dropout. Defaults to 1. Sample usage: ```python @@ -61,6 +67,7 @@ def __init__( width_factor, fill_mode="constant", fill_value=0.0, + num_cutouts=1, seed=None, **kwargs, ): @@ -68,6 +75,11 @@ def __init__( self.height_lower, self.height_upper = self._parse_bounds(height_factor) self.width_lower, self.width_upper = self._parse_bounds(width_factor) + self.num_cutouts_lower, self.num_cutouts_upper = self._parse_bounds(num_cutouts) + self.num_cutouts_lower = ( + self.num_cutouts_lower if self.num_cutouts_lower != 0 else 1 + ) + self.num_cutouts_upper += 1 if fill_mode not in ["gaussian_noise", "constant"]: raise ValueError( @@ -89,6 +101,13 @@ def __init__( type(self.width_lower), type(self.width_upper) ) ) + if not isinstance(self.num_cutouts_lower, type(self.num_cutouts_upper)): + raise ValueError( + "`num_cutouts` must have lower bound and upper bound " + "with same type, got {} and {}".format( + type(self.num_cutouts_lower), type(self.num_cutouts_upper) + ) + ) if self.height_upper < self.height_lower: raise ValueError( @@ -116,6 +135,23 @@ def __init__( "when is float, got {}".format(width_factor) ) + if self.num_cutouts_upper < self.num_cutouts_lower: + raise ValueError( + "`num_cutouts` cannot have upper bound less than lower bound" + ) + if not isinstance(self.num_cutouts_upper, int): + raise ValueError( + "`num_cutouts` must be dtype int, got {}".format( + type(self.num_cutouts_upper) + ) + ) + if not isinstance(self.num_cutouts_lower, int): + raise ValueError( + "`num_cutouts` must be dtype int, got {}".format( + type(self.num_cutouts_lower) + ) + ) + self.fill_mode = fill_mode self.fill_value = fill_value self.seed = seed @@ -136,17 +172,18 @@ def call(self, inputs, training=True): def _random_cutout(self, inputs): """Apply random cutout.""" - center_x, center_y = self._compute_rectangle_position(inputs) - rectangle_height, rectangle_width = self._compute_rectangle_size(inputs) - rectangle_fill = self._compute_rectangle_fill(inputs) - inputs = fill_utils.fill_rectangle( - inputs, - center_x, - center_y, - rectangle_width, - rectangle_height, - rectangle_fill, - ) + for _ in tf.range(self._sample_num_cutouts()): + center_x, center_y = self._compute_rectangle_position(inputs) + rectangle_height, rectangle_width = self._compute_rectangle_size(inputs) + rectangle_fill = self._compute_rectangle_fill(inputs) + inputs = fill_utils.fill_rectangle( + inputs, + center_x, + center_y, + rectangle_width, + rectangle_height, + rectangle_fill, + ) return inputs def _compute_rectangle_position(self, inputs): @@ -172,6 +209,16 @@ def _compute_rectangle_position(self, inputs): ) return center_x, center_y + def _sample_num_cutouts(self): + num_cutouts = tf.random.uniform( + shape=(1,), + minval=self.num_cutouts_lower, + maxval=self.num_cutouts_upper, + dtype=tf.int32, + seed=self.seed, + ) + return num_cutouts[0] + def _compute_rectangle_size(self, inputs): input_shape = tf.shape(inputs) batch_size, image_height, image_width = ( @@ -222,6 +269,7 @@ def get_config(self): "width_factor": self.width_factor, "fill_mode": self.fill_mode, "fill_value": self.fill_value, + "num_cutouts": self.num_cutouts, "seed": self.seed, } base_config = super().get_config() diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py index bc2621fc74..f31307e2db 100644 --- a/keras_cv/layers/preprocessing/random_cutout_test.py +++ b/keras_cv/layers/preprocessing/random_cutout_test.py @@ -15,8 +15,6 @@ from keras_cv.layers import preprocessing -NUM_CLASSES = 10 - class RandomCutoutTest(tf.test.TestCase): def _run_test(self, height_factor, width_factor): diff --git a/keras_cv/layers/preprocessing/random_sharpness.py b/keras_cv/layers/preprocessing/random_sharpness.py new file mode 100644 index 0000000000..7fc02dc54a --- /dev/null +++ b/keras_cv/layers/preprocessing/random_sharpness.py @@ -0,0 +1,112 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +class RandomSharpness(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Randomly performs the sharpness operation on given images. + + The sharpness operation first performs a blur operation, then blends between the + original image and the blurred image. This operation makes the edges of an image + less sharp than they were in the original image. + + References: + - [PIL](https://pillow.readthedocs.io/en/stable/reference/ImageEnhance.html) + + Args: + factor: Either a tuple of two floats or a single float. `factor` controls the + extent to which the image sharpness is impacted. `factor=0.0` makes this + layer perform a no-op operation, while a value of 1.0 uses the sharpened + result entirely. Values between 0 and 1 result in linear interpolation + between the original image and the sharpened image. + + Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` is + sampled between the two values for every image augmented. If a single float + is used, a value between `0.0` and the passed float is sampled. In order to + ensure the value is always the same, please pass a tuple with two identical + floats: `(0.5, 0.5)`. + value_range: the range of values the incoming images will have. + Represented as a two number tuple written [low, high]. + This is typically either `[0, 1]` or `[0, 255]` depending + on how your preprocessing pipeline is setup. Defaults to + `[0, 255].` + """ + + def __init__( + self, + factor, + value_range=(0, 255), + **kwargs, + ): + super().__init__(**kwargs) + self.value_range = value_range + self.factor = preprocessing.parse_factor_value_range(factor) + + def get_random_transformation(self): + if self.factor[0] == self.factor[1]: + return self.factor[0] + return self._random_generator.random_uniform( + (), self.factor[0], self.factor[1], dtype=tf.float32 + ) + + def augment_image(self, image, transformation=None): + image = preprocessing.transform_value_range( + image, original_range=self.value_range, target_range=(0, 255) + ) + original_image = image + + # Make image 4D for conv operation. + image = tf.expand_dims(image, axis=0) + + # [1 1 1] + # [1 5 1] + # [1 1 1] + # all divided by 13 is the default 3x3 gaussian smoothing kernel. + # Correlating or Convolving with this filter is equivalent to performing a + # gaussian blur. + kernel = ( + tf.constant( + [[1, 1, 1], [1, 5, 1], [1, 1, 1]], dtype=tf.float32, shape=[3, 3, 1, 1] + ) + / 13.0 + ) + + # Tile across channel dimension. + channels = tf.shape(image)[-1] + kernel = tf.tile(kernel, [1, 1, channels, 1]) + strides = [1, 1, 1, 1] + + smoothed_image = tf.nn.depthwise_conv2d( + image, kernel, strides, padding="VALID", dilations=[1, 1] + ) + smoothed_image = tf.clip_by_value(smoothed_image, 0.0, 255.0) + smoothed_image = tf.squeeze(smoothed_image, axis=0) + + # For the borders of the resulting image, fill in the values of the + # original image. + mask = tf.ones_like(smoothed_image) + padded_mask = tf.pad(mask, [[1, 1], [1, 1], [0, 0]]) + padded_smoothed_image = tf.pad(smoothed_image, [[1, 1], [1, 1], [0, 0]]) + + result = tf.where( + tf.equal(padded_mask, 1), padded_smoothed_image, original_image + ) + # Blend the final result. + result = preprocessing.blend(original_image, result, transformation) + result = preprocessing.transform_value_range( + result, original_range=(0, 255), target_range=self.value_range + ) + return result diff --git a/keras_cv/layers/preprocessing/random_sharpness_test.py b/keras_cv/layers/preprocessing/random_sharpness_test.py new file mode 100644 index 0000000000..ed5ff8441e --- /dev/null +++ b/keras_cv/layers/preprocessing/random_sharpness_test.py @@ -0,0 +1,71 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class RandomSharpnessTest(tf.test.TestCase): + def test_random_sharpness_preserves_output_shape(self): + img_shape = (50, 50, 3) + xs = tf.stack( + [2 * tf.ones(img_shape), tf.ones(img_shape)], + axis=0, + ) + + layer = preprocessing.RandomSharpness(0.0) + ys = layer(xs) + + self.assertEqual(xs.shape, ys.shape) + self.assertAllClose(xs, ys) + + def test_random_sharpness_blur_effect_single_channel(self): + xs = tf.expand_dims( + tf.constant( + [ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ] + ), + axis=-1, + ) + xs = tf.expand_dims(xs, axis=0) + + layer = preprocessing.RandomSharpness((1.0, 1.0)) + ys = layer(xs) + + self.assertEqual(xs.shape, ys.shape) + + result = tf.expand_dims( + tf.constant( + [ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1 / 13, 1 / 13, 1 / 13, 0, 0], + [0, 0, 1 / 13, 5 / 13, 1 / 13, 0, 0], + [0, 0, 1 / 13, 1 / 13, 1 / 13, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ] + ), + axis=-1, + ) + result = tf.expand_dims(result, axis=0) + + self.assertAllClose(ys, result) diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py new file mode 100644 index 0000000000..8fd643c1d2 --- /dev/null +++ b/keras_cv/layers/preprocessing/random_shear.py @@ -0,0 +1,123 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings + +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +class RandomShear(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Randomly shears an image. + + Args: + x: float, 2 element tuple, or `None`. For each augmented image a value is + sampled from the provided range. If a float is passed, the range is + interpreted as `(0, x)`. Values represent a percentage of the image + to shear over. For example, 0.3 shears pixels up to 30% of the way + across the image. All provided values should be positive. If + `None` is passed, no shear occurs on the X axis. Defaults to `None`. + y: float, 2 element tuple, or `None`. For each augmented image a value is + sampled from the provided range. If a float is passed, the range is + interpreted as `(0, y)`. Values represent a percentage of the image + to shear over. For example, 0.3 shears pixels up to 30% of the way + across the image. All provided values should be positive. If + `None` is passed, no shear occurs on the Y axis. Defaults to `None`. + interpolation: interpolation method used in the `ImageProjectiveTransformV3` op. + Supported values are `"nearest"` and `"bilinear"`. + Defaults to `"bilinear"`. + fill_mode: fill_mode in the `ImageProjectiveTransformV3` op. + Supported values are `"reflect"`, `"wrap"`, `"constant"`, and `"nearest"`. + Defaults to `"reflect"`. + fill_value: fill_value in the `ImageProjectiveTransformV3` op. + A `Tensor` of type `float32`. The value to be filled when fill_mode is + constant". Defaults to `0.0`. + """ + + def __init__( + self, + x=None, + y=None, + interpolation="bilinear", + fill_mode="reflect", + fill_value=0.0, + **kwargs, + ): + super().__init__(**kwargs) + if isinstance(x, float): + x = (0, x) + if isinstance(y, float): + y = (0, y) + if x is None and y is None: + warnings.warn( + "RandomShear received both `x=None` and `y=None`. As a " + "result, the layer will perform no augmentation." + ) + self.x = x + self.y = y + self.interpolation = interpolation + self.fill_mode = fill_mode + self.fill_value = fill_value + + def get_random_transformation(self): + x = self._get_shear_amount(self.x) + y = self._get_shear_amount(self.y) + return (x, y) + + def _get_shear_amount(self, constraint): + if constraint is None: + return None + + negate = self._random_generator.random_uniform((), 0, 1, dtype=tf.float32) > 0.5 + negate = tf.cond(negate, lambda: -1.0, lambda: 1.0) + + return negate * self._random_generator.random_uniform( + (), constraint[0], constraint[1] + ) + + def augment_image(self, image, transformation=None): + image = tf.expand_dims(image, axis=0) + + x, y = transformation + + if x is not None: + transform_x = RandomShear._format_transform( + [1.0, x, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0] + ) + image = preprocessing.transform( + images=image, + transforms=transform_x, + interpolation=self.interpolation, + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + + if y is not None: + transform_y = RandomShear._format_transform( + [1.0, 0.0, 0.0, y, 1.0, 0.0, 0.0, 0.0] + ) + image = preprocessing.transform( + images=image, + transforms=transform_y, + interpolation=self.interpolation, + fill_mode=self.fill_mode, + fill_value=self.fill_value, + ) + + return tf.squeeze(image, axis=0) + + @staticmethod + def _format_transform(transform): + transform = tf.convert_to_tensor(transform, dtype=tf.float32) + return transform[tf.newaxis] diff --git a/keras_cv/layers/preprocessing/random_shear_test.py b/keras_cv/layers/preprocessing/random_shear_test.py new file mode 100644 index 0000000000..960e00ec94 --- /dev/null +++ b/keras_cv/layers/preprocessing/random_shear_test.py @@ -0,0 +1,38 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class RandomShearTest(tf.test.TestCase): + def test_aggressive_shear_fills_at_least_some_pixels(self): + img_shape = (50, 50, 3) + xs = tf.stack( + [2 * tf.ones(img_shape), tf.ones(img_shape)], + axis=0, + ) + xs = tf.cast(xs, tf.float32) + + fill_value = 0.0 + layer = preprocessing.RandomShear( + x=(3, 3), seed=0, fill_mode="constant", fill_value=fill_value + ) + xs = layer(xs) + + # Some pixels should be replaced with fill value + self.assertTrue(tf.math.reduce_any(xs[0] == fill_value)) + self.assertTrue(tf.math.reduce_any(xs[0] == 2.0)) + self.assertTrue(tf.math.reduce_any(xs[1] == fill_value)) + self.assertTrue(tf.math.reduce_any(xs[1] == 1.0)) diff --git a/keras_cv/layers/preprocessing/solarization.py b/keras_cv/layers/preprocessing/solarization.py index 860cfbd64a..eaee108177 100644 --- a/keras_cv/layers/preprocessing/solarization.py +++ b/keras_cv/layers/preprocessing/solarization.py @@ -13,8 +13,10 @@ # limitations under the License. import tensorflow as tf +from keras_cv.utils import preprocessing -class Solarization(tf.keras.layers.Layer): + +class Solarization(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Applies (max_value - pixel + min_value) for each pixel in the image. When created without `threshold` parameter, the layer performs solarization to @@ -25,22 +27,29 @@ class Solarization(tf.keras.layers.Layer): - [AutoAugment: Learning Augmentation Policies from Data]( https://arxiv.org/abs/1805.09501 ) + - [RandAugment](https://arxiv.org/pdf/1909.13719.pdf) Args: - threshold: (Optionally) int or float. If specified, only pixel values above this + addition: (Optional) int or float. If specified, this value is added to each + pixel before solarization and thresholding. The addition value should be + scaled accoring to the value range (0, 255). Defaults to 0.0. + threshold: (Optional) int or float. If specified, only pixel values above this threshold will be solarized. + value_range: a tuple or a list of two elements. The first value represents + the lower bound for values in passed images, the second represents the + upper bound. Images passed to the layer should have values within + `value_range`. Defaults to `(0, 255)`. Usage: - ```python - (images, labels), _ = tf.keras.datasets.cifar10.load_data() - print(images[0, 0, 0]) - # [59 62 63] - # Note that images are Tensor with values in the range [0, 255] - solarization = Solarization() - images = solarization(images) - print(images[0, 0, 0]) - # [196, 193, 192] + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + print(images[0, 0, 0]) + # [59 62 63] + # Note that images are Tensor with values in the range [0, 255] + solarization = Solarization() + images = solarization(images) + print(images[0, 0, 0]) + # [196, 193, 192] ``` Call arguments: @@ -49,24 +58,37 @@ class Solarization(tf.keras.layers.Layer): or [height, width, channels]. """ - def __init__(self, threshold=None): + def __init__( + self, + addition=0.0, + threshold=0.0, + value_range=(0, 255), + ): super().__init__() + self.addition = addition self.threshold = threshold + self.value_range = value_range - def call(self, images): - images = tf.clip_by_value(images, clip_value_min=0, clip_value_max=255) - if self.threshold is None: - return self._solarize(images) - else: - return self._solarize_above_threshold(images) + def augment_image(self, image, transformation=None): + image = preprocessing.transform_value_range( + image, original_range=self.value_range, target_range=(0, 255) + ) + result = image + self.addition + result = tf.clip_by_value(result, 0, 255) + result = self._solarize(result) + result = preprocessing.transform_value_range( + result, original_range=(0, 255), target_range=self.value_range + ) + return result def _solarize(self, images): - return 255 - images - - def _solarize_above_threshold(self, images): - return tf.where(images < self.threshold, images, self._solarize(images)) + return tf.where(images < self.threshold, images, 255 - images) def get_config(self): - config = {"threshold": self.threshold} + config = { + "threshold": self.threshold, + "addition": self.addition, + "value_range": self.value_range, + } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/solarization_test.py b/keras_cv/layers/preprocessing/solarization_test.py index dc62de5c2f..2e81757306 100644 --- a/keras_cv/layers/preprocessing/solarization_test.py +++ b/keras_cv/layers/preprocessing/solarization_test.py @@ -12,72 +12,71 @@ # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf +from absl.testing import parameterized from keras_cv.layers.preprocessing.solarization import Solarization -class SolarizationTest(tf.test.TestCase): - def test_range_0_to_255(self): +class SolarizationTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters( + ("0_255", 0, 255), + ("64_191", 64, 191), + ("127_128", 127, 128), + ("191_64", 191, 64), + ("255_0", 255, 0), + ) + def test_output_values(self, input_value, expected_value): solarization = Solarization() - test_parameters = [ - {"input_value": 0, "expected_output_value": 255}, - {"input_value": 64, "expected_output_value": 191}, - {"input_value": 127, "expected_output_value": 128}, - {"input_value": 191, "expected_output_value": 64}, - {"input_value": 255, "expected_output_value": 0}, - ] + self._test_input_output( + layer=solarization, + input_value=input_value, + expected_value=expected_value, + dtype=tf.uint8, + ) - for parameters in test_parameters: - self._test_input_output( - layer=solarization, - input_value=parameters["input_value"], - expected_value=parameters["expected_output_value"], - dtype=tf.uint8, - ) + @parameterized.named_parameters( + ("0_245", 0, 245), + ("255_0", 255, 0), + ) + def test_solarization_with_addition(self, input_value, output_value): + solarization = Solarization(addition=10.0) - @staticmethod - def _test_input_output(layer, input_value, expected_value, dtype): - dummy_input = tf.ones(shape=(2, 224, 224, 3), dtype=dtype) * input_value - expected_output = tf.ones(shape=(2, 224, 224, 3), dtype=dtype) * expected_value + self._test_input_output( + layer=solarization, + input_value=input_value, + expected_value=output_value, + dtype=tf.float32, + ) - output = layer(dummy_input) - - tf.debugging.assert_equal(output, expected_output) - - def test_only_values_above_threshold_are_solarized_if_threshold_specified(self): + @parameterized.named_parameters( + ("0_0", 0, 0), + ("64_64", 64, 64), + ("127_127", 127, 127), + ("191_64", 191, 64), + ("255_0", 255, 0), + ) + def test_only_values_above_threshold_are_solarized(self, input_value, output_value): solarization = Solarization(threshold=128) - test_parameters = [ - {"input_value": 0, "expected_output_value": 0}, - {"input_value": 64, "expected_output_value": 64}, - {"input_value": 127, "expected_output_value": 127}, - {"input_value": 191, "expected_output_value": 64}, - {"input_value": 255, "expected_output_value": 0}, - ] + self._test_input_output( + layer=solarization, + input_value=input_value, + expected_value=output_value, + dtype=tf.uint8, + ) - for parameters in test_parameters: - self._test_input_output( - layer=solarization, - input_value=parameters["input_value"], - expected_value=parameters["expected_output_value"], - dtype=tf.uint8, - ) - - def test_input_values_outside_of_specified_range_are_clipped(self): - solarization = Solarization() + def _test_input_output(self, layer, input_value, expected_value, dtype): + input = tf.ones(shape=(2, 224, 224, 3), dtype=dtype) * input_value + expected_output = tf.clip_by_value( + ( + tf.ones(shape=(2, 224, 224, 3), dtype=layer.compute_dtype) + * expected_value + ), + 0, + 255, + ) - test_parameters = [ - {"input_value": -100, "expected_output_value": 255}, # Clipped to 0 - {"input_value": -1, "expected_output_value": 255}, # Clipped to 0 - {"input_value": 256, "expected_output_value": 0}, # Clipped to 255 - {"input_value": 300, "expected_output_value": 0}, # Clipped to 255 - ] + output = layer(input) - for parameters in test_parameters: - self._test_input_output( - layer=solarization, - input_value=parameters["input_value"], - expected_value=parameters["expected_output_value"], - dtype=tf.int32, - ) + self.assertAllClose(output, expected_output) diff --git a/keras_cv/layers/regularization/__init__.py b/keras_cv/layers/regularization/__init__.py new file mode 100644 index 0000000000..235ddf64bd --- /dev/null +++ b/keras_cv/layers/regularization/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.layers.regularization.dropblock_2d import DropBlock2D diff --git a/keras_cv/layers/regularization/dropblock_2d.py b/keras_cv/layers/regularization/dropblock_2d.py new file mode 100644 index 0000000000..00e71bda67 --- /dev/null +++ b/keras_cv/layers/regularization/dropblock_2d.py @@ -0,0 +1,241 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.keras.__internal__.layers import BaseRandomLayer + +from keras_cv.utils import conv_utils + + +class DropBlock2D(BaseRandomLayer): + """Applies DropBlock regularization to input features. + + DropBlock is a form of structured dropout, where units in a contiguous + region of a feature map are dropped together. DropBlock works better than + dropout on convolutional layers due to the fact that activation units in + convolutional layers are spatially correlated. + + It is advised to use DropBlock after activation in Conv -> BatchNorm -> Activation + block in further layers of the network. For example, the paper mentions using + DropBlock in 3rd and 4th group of ResNet blocks. + + Reference: + - [DropBlock: A regularization method for convolutional networks]( + https://arxiv.org/abs/1810.12890 + ) + + Args: + dropout_rate: float. Probability of dropping a unit. Must be between 0 and 1. + For best results, the value should be between 0.05-0.25. + dropblock_size: integer, or tuple of integers. The size of the block to be + dropped. In case of an integer a square block will be dropped. In case of a + tuple, the numbers are block's (height, width). + Must be bigger than 0, and should not be bigger than the input feature map + size. The paper authors use `dropblock_size=7` for input feature's of size + `14x14xchannels`. + If this value is greater or equal to the input feature map size you will + encounter `nan` values. + data_format: string. One of channels_last (default) or channels_first. The + ordering of the dimensions in the inputs. channels_last corresponds to + inputs with shape (batch_size, height, width, channels) while channels_first + corresponds to inputs with shape (batch_size, channels,height, width). It + defaults to the image_data_format value found in your Keras config file at + ~/.keras/keras.json. If you never set it, then it will be channels_last. + seed: integer. To use as random seed. + name: string. The name of the layer. + + Usage: + DropBlock2D can be used inside a `tf.keras.Model`: + ```python + # (...) + x = Conv2D(32, (1, 1))(x) + x = BatchNormalization()(x) + x = ReLU()(x) + x = DropBlock2D(0.1, dropblock_size=7)(x) + # (...) + ``` + When used directly, the layer will zero-out some inputs in a contiguous region and + normalize the remaining values. + + ```python + # Small feature map shape for demonstration purposes: + features = tf.random.stateless_uniform((1, 4, 4, 1), seed=[0, 1]) + + # Preview the feature map + print(features[..., 0]) + # tf.Tensor( + # [[[0.08216608 0.40928006 0.39318466 0.3162533 ] + # [0.34717774 0.73199546 0.56369007 0.9769211 ] + # [0.55243933 0.13101244 0.2941643 0.5130266 ] + # [0.38977218 0.80855536 0.6040567 0.10502195]]], shape=(1, 4, 4), + # dtype=float32) + + layer = DropBlock2D(0.1, dropblock_size=2, seed=1234) # Small size for demonstration + output = layer(features, training=True) + + # Preview the feature map after dropblock: + print(output[..., 0]) + # tf.Tensor( + # [[[0.10955477 0.54570675 0.5242462 0.42167106] + # [0.46290365 0.97599393 0. 0. ] + # [0.7365858 0.17468326 0. 0. ] + # [0.51969624 1.0780739 0.80540895 0.14002927]]], shape=(1, 4, 4), + # dtype=float32) + + # We can observe two things: + # 1. A 2x2 block has been dropped + # 2. The inputs have been slightly scaled to account for missing values. + + # The number of blocks dropped can vary, between the channels - sometimes no blocks + # will be dropped, and sometimes there will be multiple overlapping blocks. + # Let's present on a larger feature map: + + features = tf.random.stateless_uniform((1, 4, 4, 36), seed=[0, 1]) + layer = DropBlock2D(0.1, (2, 2), seed=123) + output = layer(features, training=True) + + print(output[..., 0]) # no drop + # tf.Tensor( + # [[[0.09136613 0.98085546 0.15265216 0.19690938] + # [0.48835075 0.52433217 0.1661478 0.7067729 ] + # [0.07383626 0.9938906 0.14309917 0.06882786] + # [0.43242374 0.04158871 0.24213943 0.1903095 ]]], shape=(1, 4, 4), + # dtype=float32) + + print(output[..., 9]) # drop single block + # tf.Tensor( + # [[[0.14568178 0.01571623 0.9082305 1.0545396 ] + # [0.24126057 0.86874676 0. 0. ] + # [0.44101703 0.29805306 0. 0. ] + # [0.56835717 0.04925899 0.6745584 0.20550345]]], shape=(1, 4, 4), dtype=float32) + + print(output[..., 22]) # drop two blocks + # tf.Tensor( + # [[[0.69479376 0.49463132 1.0627024 0.58349967] + # [0. 0. 0.36143216 0.58699244] + # [0. 0. 0. 0. ] + # [0.0315055 1.0117861 0. 0. ]]], shape=(1, 4, 4), + # dtype=float32) + + print(output[..., 29]) # drop two blocks with overlap + # tf.Tensor( + # [[[0.2137237 0.9120104 0.9963533 0.33937347] + # [0.21868704 0.44030213 0.5068906 0.20034194] + # [0. 0. 0. 0.5915383 ] + # [0. 0. 0. 0.9526224 ]]], shape=(1, 4, 4), + # dtype=float32) + ``` + """ + + def __init__( + self, + dropout_rate, + dropblock_size, + data_format=None, + seed=None, + name=None, + ): + super().__init__(seed=seed, name=name, force_generator=True) + if not 0.0 <= dropout_rate <= 1.0: + raise ValueError( + f"dropout_rate must be a number between 0 and 1. " + f"Received: {dropout_rate}" + ) + + self._dropout_rate = dropout_rate + self._dropblock_height, self._dropblock_width = conv_utils.normalize_tuple( + value=dropblock_size, n=2, name="dropblock_size", allow_zero=False + ) + self._data_format = conv_utils.normalize_data_format(data_format) + + def call(self, x, training=None): + if not training or self._dropout_rate == 0.0: + return x + + if self._data_format == "channels_last": + _, height, width, _ = tf.split(tf.shape(x), 4) + else: + _, _, height, width = tf.split(tf.shape(x), 4) + + # Unnest scalar values + height = tf.squeeze(height) + width = tf.squeeze(width) + + dropblock_height = tf.math.minimum(self._dropblock_height, height) + dropblock_width = tf.math.minimum(self._dropblock_width, width) + + gamma = ( + self._dropout_rate + * tf.cast(width * height, dtype=tf.float32) + / tf.cast(dropblock_height * dropblock_width, dtype=tf.float32) + / tf.cast( + (width - self._dropblock_width + 1) + * (height - self._dropblock_height + 1), + tf.float32, + ) + ) + + # Forces the block to be inside the feature map. + w_i, h_i = tf.meshgrid(tf.range(width), tf.range(height)) + valid_block = tf.logical_and( + tf.logical_and( + w_i >= int(dropblock_width // 2), + w_i < width - (dropblock_width - 1) // 2, + ), + tf.logical_and( + h_i >= int(dropblock_height // 2), + h_i < width - (dropblock_height - 1) // 2, + ), + ) + + if self._data_format == "channels_last": + valid_block = tf.reshape(valid_block, [1, height, width, 1]) + else: + valid_block = tf.reshape(valid_block, [1, 1, height, width]) + + random_noise = self._random_generator.random_uniform( + tf.shape(x), dtype=tf.float32 + ) + valid_block = tf.cast(valid_block, dtype=tf.float32) + seed_keep_rate = tf.cast(1 - gamma, dtype=tf.float32) + block_pattern = (1 - valid_block + seed_keep_rate + random_noise) >= 1 + block_pattern = tf.cast(block_pattern, dtype=tf.float32) + + if self._data_format == "channels_last": + window_size = [1, self._dropblock_height, self._dropblock_width, 1] + else: + window_size = [1, 1, self._dropblock_height, self._dropblock_width] + + # Double negative and max_pool is essentially min_pooling + block_pattern = -tf.nn.max_pool( + -block_pattern, + ksize=window_size, + strides=[1, 1, 1, 1], + padding="SAME", + data_format="NHWC" if self._data_format == "channels_last" else "NCHW", + ) + + # Slightly scale the values, to account for magnitude change + percent_ones = tf.cast(tf.reduce_sum(block_pattern), tf.float32) / tf.cast( + tf.size(block_pattern), tf.float32 + ) + return x / tf.cast(percent_ones, x.dtype) * tf.cast(block_pattern, x.dtype) + + def get_config(self): + config = { + "dropout_rate": self._dropout_rate, + "dropblock_size": (self._dropblock_height, self._dropblock_width), + "data_format": self._data_format, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/regularization/dropblock_2d_test.py b/keras_cv/layers/regularization/dropblock_2d_test.py new file mode 100644 index 0000000000..35b2f62aae --- /dev/null +++ b/keras_cv/layers/regularization/dropblock_2d_test.py @@ -0,0 +1,94 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers.regularization.dropblock_2d import DropBlock2D + + +class DropBlock2DTest(tf.test.TestCase): + FEATURE_SHAPE = (1, 14, 14, 256) # Shape of ResNet block group 3 + rng = tf.random.Generator.from_non_deterministic_state() + + def test_layer_not_created_with_invalid_block_size(self): + invalid_sizes = [0, -10, (5, -2), (0, 7), (1, 2, 3, 4)] + for size in invalid_sizes: + with self.assertRaises(ValueError): + DropBlock2D(dropblock_size=size, dropout_rate=0.1) + + def test_layer_not_created_with_invalid_dropout_rate(self): + invalid_rates = [1.1, -0.1] + for rate in invalid_rates: + with self.assertRaises(ValueError): + DropBlock2D(dropout_rate=rate, dropblock_size=7) + + def test_input_unchanged_in_eval_mode(self): + dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE) + layer = DropBlock2D(dropout_rate=0.1, dropblock_size=7) + + output = layer(dummy_inputs, training=False) + + self.assertAllClose(dummy_inputs, output) + + def test_input_unchanged_with_dropout_rate_equal_to_zero(self): + dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE) + layer = DropBlock2D(dropout_rate=0.0, dropblock_size=7) + + output = layer(dummy_inputs, training=True) + + self.assertAllClose(dummy_inputs, output) + + def test_input_gets_partially_zeroed_out_in_train_mode(self): + dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE) + layer = DropBlock2D(dropout_rate=0.1, dropblock_size=7) + + output = layer(dummy_inputs, training=True) + num_input_zeros = self._count_zeros(dummy_inputs) + num_output_zeros = self._count_zeros(output) + + self.assertGreater(num_output_zeros, num_input_zeros) + + def test_batched_input_gets_partially_zeroed_out_in_train_mode(self): + batched_shape = (4, *self.FEATURE_SHAPE[1:]) + dummy_inputs = self.rng.uniform(shape=batched_shape) + layer = DropBlock2D(dropout_rate=0.1, dropblock_size=7) + + output = layer(dummy_inputs, training=True) + num_input_zeros = self._count_zeros(dummy_inputs) + num_output_zeros = self._count_zeros(output) + + self.assertGreater(num_output_zeros, num_input_zeros) + + def test_input_gets_partially_zeroed_out_with_non_square_dropblock_size(self): + dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE) + layer = DropBlock2D(dropout_rate=0.1, dropblock_size=(7, 10)) + + output = layer(dummy_inputs, training=True) + num_input_zeros = self._count_zeros(dummy_inputs) + num_output_zeros = self._count_zeros(output) + + self.assertGreater(num_output_zeros, num_input_zeros) + + @staticmethod + def _count_zeros(tensor: tf.Tensor) -> tf.Tensor: + return tf.size(tensor) - tf.math.count_nonzero(tensor, dtype=tf.int32) + + def test_works_with_xla(self): + dummy_inputs = self.rng.uniform(shape=self.FEATURE_SHAPE) + layer = DropBlock2D(dropout_rate=0.1, dropblock_size=7) + + @tf.function(jit_compile=True) + def apply(x): + return layer(x, training=True) + + apply(dummy_inputs) diff --git a/keras_cv/metrics/coco/mean_average_precision.py b/keras_cv/metrics/coco/mean_average_precision.py index 17f6689ec0..c02e5ed62d 100644 --- a/keras_cv/metrics/coco/mean_average_precision.py +++ b/keras_cv/metrics/coco/mean_average_precision.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import tensorflow as tf from keras_cv.metrics.coco import utils @@ -135,13 +137,16 @@ def reset_state(self): @tf.function() def update_state(self, y_true, y_pred, sample_weight=None): - num_images = tf.shape(y_true)[0] - if sample_weight is not None: - raise ValueError( - "COCOMeanAveragePrecision does not support `sample_weight`" + warnings.warn( + "sample_weight is not yet supported in keras_cv COCO metrics." ) + class_ids = tf.constant(self.class_ids, dtype=self.compute_dtype) + iou_thresholds = tf.constant(self.iou_thresholds, dtype=self.compute_dtype) + + num_images = tf.shape(y_true)[0] + y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.CONFIDENCE) ground_truth_boxes_update = tf.zeros_like(self.ground_truths) @@ -160,7 +165,6 @@ def update_state(self, y_true, y_pred, sample_weight=None): detections, self.area_range[0], self.area_range[1] ) - detections = detections if self.max_detections < tf.shape(detections)[0]: detections = detections[: self.max_detections] @@ -172,8 +176,8 @@ def update_state(self, y_true, y_pred, sample_weight=None): ) ground_truths_update = tf.TensorArray(tf.int32, size=self.num_class_ids) - for c_i in range(self.num_class_ids): - category_id = self.class_ids[c_i] + for c_i in tf.range(self.num_class_ids): + category_id = class_ids[c_i] ground_truths = utils.filter_boxes( ground_truths, value=category_id, axis=bounding_box.CLASS ) @@ -190,8 +194,8 @@ def update_state(self, y_true, y_pred, sample_weight=None): ious = iou_lib.compute_ious_for_image(ground_truths, detections) - for iou_i in range(self.num_iou_thresholds): - iou_threshold = self.iou_thresholds[iou_i] + for iou_i in tf.range(self.num_iou_thresholds): + iou_threshold = iou_thresholds[iou_i] pred_matches = utils.match_boxes(ious, iou_threshold) dt_scores = detections[:, bounding_box.CONFIDENCE] @@ -277,8 +281,8 @@ def result(self): ) zero_pad = tf.zeros(shape=(1,), dtype=tf.float32) # so in this case this should be: [1, 1] - for i in range(self.num_class_ids): - for j in range(self.num_iou_thresholds): + for i in tf.range(self.num_class_ids): + for j in tf.range(self.num_iou_thresholds): recalls_i = recalls[i, j] precisions_i = precisions[i, j] diff --git a/keras_cv/metrics/coco/recall.py b/keras_cv/metrics/coco/recall.py index d89b5ebb43..43bcb5fe2b 100644 --- a/keras_cv/metrics/coco/recall.py +++ b/keras_cv/metrics/coco/recall.py @@ -11,6 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings + import tensorflow as tf import tensorflow.keras as keras import tensorflow.keras.initializers as initializers @@ -117,8 +119,8 @@ def update_state(self, y_true, y_pred, sample_weight=None): y_pred: a bounding box Tensor in corners format. sample_weight: Currently unsupported. """ - if sample_weight: - raise NotImplementedError( + if sample_weight is not None: + warnings.warn( "sample_weight is not yet supported in keras_cv COCO metrics." ) diff --git a/keras_cv/utils/conv_utils.py b/keras_cv/utils/conv_utils.py new file mode 100644 index 0000000000..5b0789af0d --- /dev/null +++ b/keras_cv/utils/conv_utils.py @@ -0,0 +1,82 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + + +def normalize_data_format(value): + if value is None: + value = tf.keras.backend.image_data_format() + data_format = value.lower() + if data_format not in {"channels_first", "channels_last"}: + raise ValueError( + "The `data_format` argument must be one of " + f'"channels_first", "channels_last". Received: {value}' + ) + return data_format + + +def normalize_tuple(value, n, name, allow_zero=False): + """Transforms non-negative/positive integer/integers into an integer tuple. + Args: + value: The value to validate and convert. Could an int, or any iterable of + ints. + n: The size of the tuple to be returned. + name: The name of the argument being validated, e.g. "strides" or + "kernel_size". This is only used to format error messages. + allow_zero: Default to False. A ValueError will raised if zero is received + and this param is False. + Returns: + A tuple of n integers. + Raises: + ValueError: If something else than an int/long or iterable thereof or a + negative value is + passed. + """ + error_msg = ( + f"The `{name}` argument must be a tuple of {n} " f"integers. Received: {value}" + ) + + if isinstance(value, int): + value_tuple = (value,) * n + else: + try: + value_tuple = tuple(value) + except TypeError: + raise ValueError(error_msg) + if len(value_tuple) != n: + raise ValueError(error_msg) + for single_value in value_tuple: + try: + int(single_value) + except (ValueError, TypeError): + error_msg += ( + f"including element {single_value} of " f"type {type(single_value)}" + ) + raise ValueError(error_msg) + + if allow_zero: + unqualified_values = {v for v in value_tuple if v < 0} + req_msg = ">= 0" + else: + unqualified_values = {v for v in value_tuple if v <= 0} + req_msg = "> 0" + + if unqualified_values: + error_msg += ( + f" including {unqualified_values}" + f" that does not satisfy the requirement `{req_msg}`." + ) + raise ValueError(error_msg) + + return value_tuple diff --git a/keras_cv/utils/preprocessing.py b/keras_cv/utils/preprocessing.py new file mode 100644 index 0000000000..3d9b35ee18 --- /dev/null +++ b/keras_cv/utils/preprocessing.py @@ -0,0 +1,203 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from tensorflow.keras import backend + + +def transform_value_range(images, original_range, target_range, dtype=tf.float32): + """transforms values in input tensor from original_range to target_range. + This function is intended to be used in preprocessing layers that + rely upon color values. This allows us to assume internally that + the input tensor is always in the range [0, 255]. + + Args: + images: the set of images to transform to the target range range. + original_range: the value range to transform from. + target_range: the value range to transform to. + dtype: the dtype to compute the conversion with. Defaults to tf.float32. + + Returns: + a new Tensor with values in the target range. + + Usage: + ```python + original_range = [0, 1] + target_range = [0, 255] + images = keras_cv.utils.preprocessing.transform_value_range( + images, + original_range, + target_range + ) + images = tf.math.minimum(images + 10, 255) + images = keras_cv.utils.preprocessing.transform_value_range( + images, + target_range, + original_range + ) + ``` + """ + if original_range[0] == target_range[0] and original_range[1] == target_range[1]: + return images + + images = tf.cast(images, dtype=dtype) + original_min_value, original_max_value = _unwrap_value_range( + original_range, dtype=dtype + ) + target_min_value, target_max_value = _unwrap_value_range(target_range, dtype=dtype) + + # images in the [0, 1] scale + images = (images - original_min_value) / (original_max_value - original_min_value) + + scale_factor = target_max_value - target_min_value + return (images * scale_factor) + target_min_value + + +def _unwrap_value_range(value_range, dtype=tf.float32): + min_value, max_value = value_range + min_value = tf.cast(min_value, dtype=dtype) + max_value = tf.cast(max_value, dtype=dtype) + return min_value, max_value + + +def blend(image1: tf.Tensor, image2: tf.Tensor, factor: float) -> tf.Tensor: + """Blend image1 and image2 using 'factor'. + + Factor should be in the range [0, 1]. A value of 0.0 means only image1 + is used. A value of 1.0 means only image2 is used. A value between 0.0 + and 1.0 means we linearly interpolate the pixel values between the two + images. A value greater than 1.0 "extrapolates" the difference + between the two pixel values, and we clip the results to values + between 0 and 255. + Args: + image1: An image Tensor of type tf.float32 with value range [0, 255]. + image2: An image Tensor of type tf.float32 with value range [0, 255]. + factor: A floating point value above 0.0. + Returns: + A blended image Tensor. + """ + difference = image2 - image1 + scaled = factor * difference + temp = image1 + scaled + return tf.clip_by_value(temp, 0.0, 255.0) + + +def parse_factor_value_range(param, min_value=0.0, max_value=1.0, param_name="factor"): + if isinstance(param, float): + param = (min_value, param) + + if param[0] > param[1]: + raise ValueError( + f"`{param_name}[0] > {param_name}[1]`, `{param_name}[0]` must be <= " + f"`{param_name}[1]`. Got `{param_name}={param}`" + ) + if param[0] < min_value or param[1] > max_value: + raise ValueError( + f"`{param_name}` should be inside of range [{min_value}, {max_value}]. " + f"Got {param_name}={param}" + ) + + return param + + +def transform( + images, + transforms, + fill_mode="reflect", + fill_value=0.0, + interpolation="bilinear", + output_shape=None, + name=None, +): + """Applies the given transform(s) to the image(s). + + Args: + images: A tensor of shape + `(num_images, num_rows, num_columns, num_channels)` (NHWC). The rank must + be statically known (the shape is not `TensorShape(None)`). + transforms: Projective transform matrix/matrices. A vector of length 8 or + tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, + c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* + point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where + `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the + transform mapping input points to output points. Note that gradients are + not backpropagated into transformation parameters. + fill_mode: Points outside the boundaries of the input are filled according + to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). + fill_value: a float represents the value to be filled outside the boundaries + when `fill_mode="constant"`. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + output_shape: Output dimension after the transform, `[height, width]`. + If `None`, output is the same size as input image. + name: The name of the op. + + Fill mode behavior for each valid value is as follows: + + - reflect (d c b a | a b c d | d c b a) + The input is extended by reflecting about the edge of the last pixel. + + - constant (k k k k | a b c d | k k k k) + The input is extended by filling all + values beyond the edge with the same constant value k = 0. + + - wrap (a b c d | a b c d | a b c d) + The input is extended by wrapping around to the opposite edge. + + - nearest (a a a a | a b c d | d d d d) + The input is extended by the nearest pixel. + + Input shape: + 4D tensor with shape: `(samples, height, width, channels)`, + in `"channels_last"` format. + + Output shape: + 4D tensor with shape: `(samples, height, width, channels)`, + in `"channels_last"` format. + + Returns: + Image(s) with the same type and shape as `images`, with the given + transform(s) applied. Transformed coordinates outside of the input image + will be filled with zeros. + + Raises: + TypeError: If `image` is an invalid type. + ValueError: If output shape is not 1-D int32 Tensor. + """ + with backend.name_scope(name or "transform"): + if output_shape is None: + output_shape = tf.shape(images)[1:3] + if not tf.executing_eagerly(): + output_shape_value = tf.get_static_value(output_shape) + if output_shape_value is not None: + output_shape = output_shape_value + + output_shape = tf.convert_to_tensor(output_shape, tf.int32, name="output_shape") + + if not output_shape.get_shape().is_compatible_with([2]): + raise ValueError( + "output_shape must be a 1-D Tensor of 2 elements: " + "new_height, new_width, instead got " + "{}".format(output_shape) + ) + + fill_value = tf.convert_to_tensor(fill_value, tf.float32, name="fill_value") + + return tf.raw_ops.ImageProjectiveTransformV3( + images=images, + output_shape=output_shape, + fill_value=fill_value, + transforms=transforms, + fill_mode=fill_mode.upper(), + interpolation=interpolation.upper(), + ) diff --git a/keras_cv/utils/preprocessing_test.py b/keras_cv/utils/preprocessing_test.py new file mode 100644 index 0000000000..df1e4255af --- /dev/null +++ b/keras_cv/utils/preprocessing_test.py @@ -0,0 +1,50 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +class PreprocessingTestCase(tf.test.TestCase): + def setUp(self): + super().setUp() + + def test_transform_to_standard_range_neg_one_range(self): + x = tf.constant([-1, 0, 1]) + x = preprocessing.transform_value_range( + x, original_range=[-1, 1], target_range=[0, 255] + ) + self.assertAllClose(x, [0.0, 127.5, 255.0]) + + def test_transform_to_same_range(self): + x = tf.constant([-1, 0, 1]) + x = preprocessing.transform_value_range( + x, original_range=[0, 255], target_range=[0, 255] + ) + self.assertAllClose(x, [-1, 0, 1]) + + def test_transform_to_standard_range(self): + x = tf.constant([8 / 255, 9 / 255, 255 / 255]) + x = preprocessing.transform_value_range( + x, original_range=[0, 1], target_range=[0, 255] + ) + self.assertAllClose(x, [8.0, 9.0, 255.0]) + + def test_transform_to_value_range(self): + x = tf.constant([128.0, 255.0, 0.0]) + x = preprocessing.transform_value_range( + x, original_range=[0, 255], target_range=[0, 1] + ) + self.assertAllClose(x, [128 / 255, 1, 0]) diff --git a/setup.cfg b/setup.cfg index a5f79cbe47..d14b3c4933 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,5 @@ +[metadata] +version = attr: keras_cv.__version__ [tool:pytest] filterwarnings = @@ -32,4 +34,4 @@ ignore = [isort] known_first_party = keras_cv,tests default_section = THIRDPARTY -line_length = 88 \ No newline at end of file +line_length = 88 diff --git a/setup.py b/setup.py index f2a7d59d85..d97bc585d6 100644 --- a/setup.py +++ b/setup.py @@ -14,17 +14,25 @@ """Setup script.""" +import pathlib + from setuptools import find_packages from setuptools import setup +HERE = pathlib.Path(__file__).parent +README = (HERE / "README.md").read_text() + setup( name="keras-cv", description="Industry-strength computer Vision extensions for Keras.", + long_description=README, + long_description_content_type="text/markdown", url="https://github.com/keras-team/keras-cv", author="Keras team", author_email="keras-cv@google.com", license="Apache License 2.0", - install_requires=["packaging", "tensorflow", "absl-py"], + # Temporarily require tf-nightly until tf 2.9 + install_requires=["packaging", "tf-nightly", "absl-py"], extras_require={ "tests": ["flake8", "isort", "black", "pytest"], "examples": ["tensorflow_datasets", "matplotlib"], diff --git a/shell/clean.sh b/shell/clean.sh index 004ba33990..fb29df9f08 100755 --- a/shell/clean.sh +++ b/shell/clean.sh @@ -2,4 +2,4 @@ rm -rf keras_cv.egg-info/ rm -rf keras_cv/**/__pycache__ rm -rf keras_cv/__pycache__ - +rm -rf build/ diff --git a/shell/format.sh b/shell/format.sh index 09d71804fa..aff6e55241 100755 --- a/shell/format.sh +++ b/shell/format.sh @@ -1,4 +1,3 @@ #!/bin/bash -isort --sl . +isort --sl --profile=black . black . - diff --git a/shell/lint.sh b/shell/lint.sh index dc0cd79b15..b5d762360a 100755 --- a/shell/lint.sh +++ b/shell/lint.sh @@ -1,5 +1,5 @@ #!/bin/bash -isort --sl -c . +isort --sl -c --profile=black . if ! [ $? -eq 0 ] then echo "Please run \"sh shell/format.sh\" to format the code." @@ -20,7 +20,7 @@ then exit 1 fi echo "no issues with black" -for i in $(find keras_cv -name '*.py') # or whatever other pattern... +for i in $(find examples keras_cv -name '*.py') # or whatever other pattern... do if ! grep -q Copyright $i then From 1489531c545a2aa6742cecbb94a2719cf2c9addc Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 26 Mar 2022 16:24:55 +0100 Subject: [PATCH 38/43] WIP. GridMask to BaseImageAugmentationLayer --- keras_cv/layers/preprocessing/grid_mask.py | 194 +++++++-------------- 1 file changed, 66 insertions(+), 128 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 6e9a81fad9..01756af410 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -17,9 +17,21 @@ from tensorflow.keras import layers from keras_cv.utils import fill_utils +from keras.layers.preprocessing.image_preprocessing import BaseImageAugmentationLayer -class GridMask(layers.Layer): +def _center_crop(mask, width, height): + masks_shape = tf.shape(mask) + h_diff = masks_shape[0] - height + w_diff = masks_shape[1] - width + + h_start = tf.cast(h_diff / 2, tf.int32) + w_start = tf.cast(w_diff / 2, tf.int32) + return tf.image.crop_to_bounding_box(mask, h_start, w_start, height, width) + + +# class GridMask(tf.keras.__internal__.layers.BaseImageAugmentationLayer): +class GridMask(BaseImageAugmentationLayer): """GridMask class for grid-mask augmentation. @@ -90,6 +102,7 @@ def __init__( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) self.seed = seed + self.auto_vectorize = False self._check_parameter_values() @@ -119,135 +132,84 @@ def _check_parameter_values(self): f'"gaussian_noise", or "random". Got `fill_mode`={fill_mode}' ) - def _compute_grid_masks(self, input_shape): - """Computes grid masks""" - batch_size = input_shape[0] - height = tf.cast(input_shape[1], tf.float32) - width = tf.cast(input_shape[2], tf.float32) + def get_random_transformation(self, image=None, label=None, bounding_box=None): + if self.ratio == "random": + return tf.random.uniform( + shape=(), minval=0, maxval=1, dtype=tf.float32, seed=self.seed + ) + else: + return self.ratio + + def _compute_grid_mask(self, input_shape, ratio): + height = tf.cast(input_shape[0], tf.float32) + width = tf.cast(input_shape[1], tf.float32) # masks side length input_diagonal_len = tf.sqrt(tf.square(width) + tf.square(height)) mask_side_len = tf.math.ceil(input_diagonal_len) - # grid unit sizes - unit_sizes = tf.random.uniform( - shape=[batch_size], + # grid unit size + unit_size = tf.random.uniform( + shape=(), minval=tf.math.minimum(height * 0.5, width * 0.3), maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, dtype=tf.float32, ) - if self.ratio == "random": - ratio = tf.random.uniform( - shape=[batch_size], minval=0, maxval=1, dtype=tf.float32, seed=self.seed - ) - else: - ratio = self.ratio - rectangle_side_len = tf.cast((1 - ratio) * unit_sizes, tf.float32) + rectangle_side_len = tf.cast((1 - ratio) * unit_size, tf.float32) - # sample x and y offsets for grid units randomly between 0 and unit_sizes - delta_x = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) - delta_y = tf.random.uniform([batch_size], minval=0, maxval=1, dtype=tf.float32) - delta_x = delta_x * unit_sizes - delta_y = delta_y * unit_sizes + # sample x and y offset for grid units randomly between 0 and unit_size + delta_x = tf.random.uniform(shape=(), minval=0, maxval=unit_size, dtype=tf.float32) + delta_y = tf.random.uniform(shape=(), minval=0, maxval=unit_size, dtype=tf.float32) - # grid size (number of diagonal units per grid) - grid_sizes = mask_side_len // unit_sizes + 1 - max_grid_size = tf.reduce_max(grid_sizes) - - # grid size range per image - grid_size_range = tf.range(1, max_grid_size + 1) - grid_size_range = tf.tile(tf.expand_dims(grid_size_range, 0), [batch_size, 1]) - - # make broadcastable to grid size ranges - delta_x = tf.expand_dims(delta_x, 1) - delta_y = tf.expand_dims(delta_y, 1) - unit_sizes = tf.expand_dims(unit_sizes, 1) - rectangle_side_len = tf.expand_dims(rectangle_side_len, 1) + # grid size (number of diagonal units in grid) + grid_size = mask_side_len // unit_size + 1 + grid_size_range = tf.range(1, grid_size + 1) # diagonal corner coordinates - d_range = grid_size_range * unit_sizes - x1 = d_range - delta_x + unit_size_range = grid_size_range * unit_size + x1 = unit_size_range - delta_x x0 = x1 - rectangle_side_len - y1 = d_range - delta_y + y1 = unit_size_range - delta_y y0 = y1 - rectangle_side_len - # not every input has the same grid size (its random), - # so we mask out some of the coordinates for smaller grids. - d_range_mask = tf.sequence_mask( - lengths=grid_sizes, maxlen=max_grid_size, dtype=tf.float32 - ) - x1 = x1 * d_range_mask - x0 = x0 * d_range_mask - y1 = y1 * d_range_mask - y0 = y0 * d_range_mask - - # mesh grid of diagonal top left corner coordinates for each image - x0 = tf.tile(tf.expand_dims(x0, 1), [1, max_grid_size, 1]) - y0 = tf.tile(tf.expand_dims(y0, 1), [1, max_grid_size, 1]) - y0 = tf.transpose(y0, [0, 2, 1]) - - # mesh grid of diagonal bottom right corner coordinates for each image - x1 = tf.tile(tf.expand_dims(x1, 1), [1, max_grid_size, 1]) - y1 = tf.tile(tf.expand_dims(y1, 1), [1, max_grid_size, 1]) - y1 = tf.transpose(y1, [0, 2, 1]) - - # flatten mesh grids - x0 = tf.reshape(x0, [-1, max_grid_size]) - y0 = tf.reshape(y0, [-1, max_grid_size]) - x1 = tf.reshape(x1, [-1, max_grid_size]) - y1 = tf.reshape(y1, [-1, max_grid_size]) - - # combine coordinates to (x0, y0, x1, y1) - # with shape (num_rectangles_in_batch, 4) - corners = tf.stack([x0, y0, x1, y1], axis=-1) - corners = tf.reshape(corners, [-1, 4]) - - # make mask for each rectangle - mask_side_len = tf.cast(mask_side_len, tf.int32) - masks = fill_utils.corners_to_mask(corners, (mask_side_len, mask_side_len)) - - # reshape masks into shape - # (batch_size, rectangles_per_image, mask_height, mask_width) - mask_side_len = tf.cast(mask_side_len, tf.float32) - masks = tf.reshape( - masks, - [-1, max_grid_size * max_grid_size, mask_side_len, mask_side_len], - ) - - # combine rectangle masks per image - masks = tf.reduce_any(masks, axis=1) + # compute grid coordinates + x0, y0 = tf.meshgrid(x0, y0) + x1, y1 = tf.meshgrid(x1, y1) - return masks + # flatten mesh grid + x0 = tf.reshape(x0, [-1]) + y0 = tf.reshape(y0, [-1]) + x1 = tf.reshape(x1, [-1]) + y1 = tf.reshape(y1, [-1]) - def _center_crop(self, masks, width, height): - masks_shape = tf.shape(masks) - h_diff = masks_shape[1] - height - w_diff = masks_shape[2] - width + corners = tf.stack([x0, y0, x1, y1], axis=-1) - h_start = tf.cast(h_diff / 2, tf.int32) - w_start = tf.cast(w_diff / 2, tf.int32) - return tf.image.crop_to_bounding_box(masks, h_start, w_start, height, width) + mask_side_len = tf.cast(mask_side_len, tf.int32) + mask = tf.random.uniform(shape=(mask_side_len, mask_side_len), minval=0, maxval=2, dtype=tf.int32) + mask = tf.cast(mask, tf.bool) + return mask - def _grid_mask(self, images): - input_shape = tf.shape(images) + def augment_image(self, image, transformation=None): + ratio = transformation + input_shape = tf.shape(image) # compute grid masks - masks = self._compute_grid_masks(input_shape) + mask = self._compute_grid_mask(input_shape, ratio) - # convert masks to single-channel images - masks = tf.cast(masks, tf.float32) - masks = tf.expand_dims(masks, axis=-1) + # convert mask to single-channel images + mask = tf.cast(mask, tf.float32) + mask = tf.expand_dims(mask, axis=-1) - # randomly rotate masks - masks = self.random_rotate(masks) + # randomly rotate mask + mask = self.random_rotate(mask) - # center crop masks - input_height = input_shape[1] - input_width = input_shape[2] - masks = self._center_crop(masks, input_width, input_height) + # center crop mask + input_height = input_shape[0] + input_width = input_shape[1] + mask = _center_crop(mask, input_width, input_height) # convert back to boolean mask - masks = tf.cast(masks, tf.bool) + mask = tf.cast(mask, tf.bool) # fill if self.fill_mode == "constant": @@ -256,31 +218,7 @@ def _grid_mask(self, images): # gaussian noise fill_value = tf.random.normal(input_shape) - return tf.where(masks, fill_value, images) - - def call(self, images, training=True): - """call method for the GridMask layer. - - Args: - images: Tensor representing images with shape - [batch_size, width, height, channels] or [width, height, channels] - of type int or float. Values should be in the range [0, 255]. - Returns: - images: augmented images, same shape as input. - """ - - if training is None: - training = backend.learning_phase() - - if training: - if images.shape.rank == 3: - images = tf.expand_dims(images, axis=0) - images = self._grid_mask(images) - images = tf.squeeze(images, axis=0) - else: - images = self._grid_mask(images) - - return images + return tf.where(mask, fill_value, image) def get_config(self): config = { From c2900cfd7554a7c56de7f76e331e025b587290b0 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Sat, 26 Mar 2022 17:08:15 +0100 Subject: [PATCH 39/43] WIP. GridMask to BaseImageAugmentationLayer --- keras_cv/layers/preprocessing/grid_mask.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 01756af410..7463dc1b1a 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -13,11 +13,9 @@ # limitations under the License. import tensorflow as tf -from tensorflow.keras import backend from tensorflow.keras import layers from keras_cv.utils import fill_utils -from keras.layers.preprocessing.image_preprocessing import BaseImageAugmentationLayer def _center_crop(mask, width, height): @@ -30,8 +28,7 @@ def _center_crop(mask, width, height): return tf.image.crop_to_bounding_box(mask, h_start, w_start, height, width) -# class GridMask(tf.keras.__internal__.layers.BaseImageAugmentationLayer): -class GridMask(BaseImageAugmentationLayer): +class GridMask(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """GridMask class for grid-mask augmentation. @@ -102,7 +99,7 @@ def __init__( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) self.seed = seed - self.auto_vectorize = False + self.auto_vectorize = True self._check_parameter_values() @@ -183,11 +180,11 @@ def _compute_grid_mask(self, input_shape, ratio): y1 = tf.reshape(y1, [-1]) corners = tf.stack([x0, y0, x1, y1], axis=-1) - mask_side_len = tf.cast(mask_side_len, tf.int32) - mask = tf.random.uniform(shape=(mask_side_len, mask_side_len), minval=0, maxval=2, dtype=tf.int32) - mask = tf.cast(mask, tf.bool) - return mask + rectangle_masks = fill_utils.corners_to_mask(corners, mask_shape=(mask_side_len, mask_side_len)) + grid_mask = tf.reduce_any(rectangle_masks, axis=0) + + return grid_mask def augment_image(self, image, transformation=None): ratio = transformation From 19794f17e7ec9749e6dfff0bc11a15ef115031f0 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Wed, 30 Mar 2022 19:36:16 +0200 Subject: [PATCH 40/43] merge master --- .github/API_DESIGN.md | 15 +- .../layers/preprocessing/random_hue_demo.py | 54 +++++++ .../preprocessing/random_saturation_demo.py | 55 ++++++++ keras_cv/layers/preprocessing/__init__.py | 2 + .../layers/preprocessing/auto_contrast.py | 6 + .../layers/preprocessing/channel_shuffle.py | 1 + keras_cv/layers/preprocessing/cut_mix.py | 1 + keras_cv/layers/preprocessing/equalization.py | 41 ++++-- keras_cv/layers/preprocessing/grayscale.py | 1 + keras_cv/layers/preprocessing/grid_mask.py | 3 +- keras_cv/layers/preprocessing/mix_up.py | 1 + .../layers/preprocessing/posterization.py | 3 +- .../random_color_degeneration.py | 8 +- .../layers/preprocessing/random_cutout.py | 132 +++++------------- .../preprocessing/random_cutout_test.py | 8 ++ keras_cv/layers/preprocessing/random_hue.py | 67 +++++++++ .../layers/preprocessing/random_hue_test.py | 108 ++++++++++++++ .../layers/preprocessing/random_saturation.py | 77 ++++++++++ .../preprocessing/random_saturation_test.py | 96 +++++++++++++ .../layers/preprocessing/random_sharpness.py | 8 +- keras_cv/layers/preprocessing/random_shear.py | 16 ++- .../preprocessing/serialization_test.py | 52 +++++++ keras_cv/layers/preprocessing/solarization.py | 10 +- 23 files changed, 638 insertions(+), 127 deletions(-) create mode 100644 examples/layers/preprocessing/random_hue_demo.py create mode 100644 examples/layers/preprocessing/random_saturation_demo.py create mode 100644 keras_cv/layers/preprocessing/random_hue.py create mode 100644 keras_cv/layers/preprocessing/random_hue_test.py create mode 100644 keras_cv/layers/preprocessing/random_saturation.py create mode 100644 keras_cv/layers/preprocessing/random_saturation_test.py create mode 100644 keras_cv/layers/preprocessing/serialization_test.py diff --git a/.github/API_DESIGN.md b/.github/API_DESIGN.md index 190c4af064..c5c708ea50 100644 --- a/.github/API_DESIGN.md +++ b/.github/API_DESIGN.md @@ -14,6 +14,18 @@ loss fond of `segm`. In order to ensure full consistency, we have decided to use the full names for label types in our code base. # Preprocessing Layers +## Strength Parameters +Many augmentation layers take a parameter representing a strength, often called +`factor`. When possible, factor values must conform to a the range: `[0, 1]`, with +1 representing the strongest transformation and 0 representing a no-op transform. +The strength of an augmentation should scale linearly with this factor. If needed, +a transformation can be performed to map to a large value range internally. If +this is done, please provide a thorough explanation of the value range semantics in +the docstring. + +Additionally, factors should support both float and tuples as inputs. If a float is +passed, such as `factor=0.5`, the layer should default to the range `[0, factor]`. + ## BaseImageAugmentationLayer When implementing preprocessing, we encourage users to subclass the `tf.keras.__internal__.layers.BaseImageAugmentationLayer`. This layer provides @@ -25,8 +37,7 @@ When subclassing `BaseImageAugmentationLayer`, several methods can overridden: - `augment_label()` allows updates to be made to labels - `augment_bounding_box()` allows updates to bounding boxes to be made -When a canonical layer subclassing BaseImageAugmentationLayer is available, a -link to it will be added here. +[`RandomShear` serves as a canonical example of how to subclass `BaseImageAugmentationLayer`](https://github.com/keras-team/keras-cv/blob/master/keras_cv/layers/preprocessing/random_shear.py) ## Vectorization `BaseImageAugmentationLayer` requires you to implement augmentations in an diff --git a/examples/layers/preprocessing/random_hue_demo.py b/examples/layers/preprocessing/random_hue_demo.py new file mode 100644 index 0000000000..f5ea354dcc --- /dev/null +++ b/examples/layers/preprocessing/random_hue_demo.py @@ -0,0 +1,54 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_hue_demo.py shows how to use the RandomHue preprocessing layer. +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_datasets as tfds + +from keras_cv.layers import preprocessing + +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + + +def resize(image, label): + image = tf.image.resize(image, IMG_SIZE) + return image, label + + +def main(): + data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) + train_ds = data["train"] + + train_ds = train_ds.map(lambda x, y: resize(x, y)).batch(BATCH_SIZE) + random_hue = preprocessing.RandomHue(factor=(0.0, 1.0)) + train_ds = train_ds.map( + lambda x, y: (random_hue(x), y), num_parallel_calls=tf.data.AUTOTUNE + ) + + for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() + + +if __name__ == "__main__": + main() diff --git a/examples/layers/preprocessing/random_saturation_demo.py b/examples/layers/preprocessing/random_saturation_demo.py new file mode 100644 index 0000000000..9abda29521 --- /dev/null +++ b/examples/layers/preprocessing/random_saturation_demo.py @@ -0,0 +1,55 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""random_saturation_demo.py shows how to use the RandomSaturation preprocessing layer. + +Operates on the oxford_flowers102 dataset. In this script the flowers +are loaded, then are passed through the preprocessing layers. +Finally, they are shown using matplotlib. +""" +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_datasets as tfds + +from keras_cv.layers import preprocessing + +IMG_SIZE = (224, 224) +BATCH_SIZE = 64 + + +def resize(image, label): + image = tf.image.resize(image, IMG_SIZE) + return image, label + + +def main(): + data, ds_info = tfds.load("oxford_flowers102", with_info=True, as_supervised=True) + train_ds = data["train"] + + train_ds = train_ds.map(lambda x, y: resize(x, y)).batch(BATCH_SIZE) + random_saturation = preprocessing.RandomSaturation(factor=(0.0, 1.0)) + train_ds = train_ds.map( + lambda x, y: (random_saturation(x), y), num_parallel_calls=tf.data.AUTOTUNE + ) + + for images, labels in train_ds.take(1): + plt.figure(figsize=(8, 8)) + for i in range(9): + plt.subplot(3, 3, i + 1) + plt.imshow(images[i].numpy().astype("uint8")) + plt.axis("off") + plt.show() + + +if __name__ == "__main__": + main() diff --git a/keras_cv/layers/preprocessing/__init__.py b/keras_cv/layers/preprocessing/__init__.py index f2617724c0..9a7601f0fa 100644 --- a/keras_cv/layers/preprocessing/__init__.py +++ b/keras_cv/layers/preprocessing/__init__.py @@ -39,6 +39,8 @@ RandomColorDegeneration, ) from keras_cv.layers.preprocessing.random_cutout import RandomCutout +from keras_cv.layers.preprocessing.random_hue import RandomHue +from keras_cv.layers.preprocessing.random_saturation import RandomSaturation from keras_cv.layers.preprocessing.random_sharpness import RandomSharpness from keras_cv.layers.preprocessing.random_shear import RandomShear from keras_cv.layers.preprocessing.solarization import Solarization diff --git a/keras_cv/layers/preprocessing/auto_contrast.py b/keras_cv/layers/preprocessing/auto_contrast.py index ff5f23391c..55143a69b8 100644 --- a/keras_cv/layers/preprocessing/auto_contrast.py +++ b/keras_cv/layers/preprocessing/auto_contrast.py @@ -17,6 +17,7 @@ from keras_cv.utils import preprocessing +@tf.keras.utils.register_keras_serializable(package="keras_cv") class AutoContrast(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Performs the AutoContrast operation on an image. @@ -60,3 +61,8 @@ def augment_image(self, image, transformation=None): # don't process NaN channels result = tf.where(tf.math.is_nan(result), original_image, result) return result + + def get_config(self): + config = super().get_config() + config.update({"value_range": self.value_range}) + return config diff --git a/keras_cv/layers/preprocessing/channel_shuffle.py b/keras_cv/layers/preprocessing/channel_shuffle.py index b10b40d21a..effc159a7d 100644 --- a/keras_cv/layers/preprocessing/channel_shuffle.py +++ b/keras_cv/layers/preprocessing/channel_shuffle.py @@ -16,6 +16,7 @@ from tensorflow.keras import layers +@tf.keras.utils.register_keras_serializable(package="keras_cv") class ChannelShuffle(layers.Layer): """Shuffle channels of an input image. diff --git a/keras_cv/layers/preprocessing/cut_mix.py b/keras_cv/layers/preprocessing/cut_mix.py index 65ba3da2b0..a598f9cf77 100644 --- a/keras_cv/layers/preprocessing/cut_mix.py +++ b/keras_cv/layers/preprocessing/cut_mix.py @@ -19,6 +19,7 @@ from keras_cv.utils import fill_utils +@tf.keras.utils.register_keras_serializable(package="keras_cv") class CutMix(layers.Layer): """CutMix implements the CutMix data augmentation technique. diff --git a/keras_cv/layers/preprocessing/equalization.py b/keras_cv/layers/preprocessing/equalization.py index acb231b228..b2ecd6cdcc 100644 --- a/keras_cv/layers/preprocessing/equalization.py +++ b/keras_cv/layers/preprocessing/equalization.py @@ -13,13 +13,20 @@ # limitations under the License. import tensorflow as tf +from keras_cv.utils import preprocessing -class Equalization(tf.keras.layers.Layer): + +@tf.keras.utils.register_keras_serializable(package="keras_cv") +class Equalization(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Equalization performs histogram equalization on a channel-wise basis. Args: bins: Integer indicating the number of bins to use in histogram equalization. Should be in the range [0, 256] + value_range: a tuple or a list of two elements. The first value represents + the lower bound for values in passed images, the second represents the + upper bound. Images passed to the layer should have values within + `value_range`. Defaults to `(0, 255)`. Usage: ```python @@ -35,9 +42,10 @@ class Equalization(tf.keras.layers.Layer): of type float or int. Should be in NHWC format. """ - def __init__(self, bins=256, **kwargs): + def __init__(self, bins=256, value_range=(0, 255), **kwargs): super().__init__(**kwargs) self.bins = bins + self.value_range = value_range def equalize_channel(self, image, channel_index): """equalize_channel performs histogram equalization on a single channel. @@ -47,7 +55,6 @@ def equalize_channel(self, image, channel_index): with channels last channel_index: channel to equalize """ - dtype = image.dtype image = image[..., channel_index] # Compute the histogram of the image channel. histogram = tf.histogram_fixed_width(image, [0, 255], nbins=self.bins) @@ -74,18 +81,24 @@ def build_mapping(histogram, step): result = tf.cond( tf.equal(step, 0), lambda: image, - lambda: tf.gather(build_mapping(histogram, step), image), + lambda: tf.cast( + tf.gather(build_mapping(histogram, step), tf.cast(image, tf.int32)), + self.compute_dtype, + ), ) - return tf.cast(result, dtype) + return result - def call(self, images): - # Assumes RGB for now. Scales each channel independently - # and then stacks the result. - # TODO(lukewood): ideally this would be vectorized. - r = tf.map_fn(lambda x: self.equalize_channel(x, 0), images) - g = tf.map_fn(lambda x: self.equalize_channel(x, 1), images) - b = tf.map_fn(lambda x: self.equalize_channel(x, 2), images) + def augment_image(self, image, transformation=None): + image = preprocessing.transform_value_range(image, self.value_range, (0, 255)) + r = self.equalize_channel(image, 0) + g = self.equalize_channel(image, 1) + b = self.equalize_channel(image, 2) + image = tf.stack([r, g, b], axis=-1) + image = preprocessing.transform_value_range(image, (0, 255), self.value_range) + return image - images = tf.stack([r, g, b], axis=-1) - return images + def get_config(self): + config = super().get_config() + config.update({"bins": self.bins, "value_range": self.value_range}) + return config diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py index 4281544482..e904fe9677 100644 --- a/keras_cv/layers/preprocessing/grayscale.py +++ b/keras_cv/layers/preprocessing/grayscale.py @@ -15,6 +15,7 @@ import tensorflow as tf +@tf.keras.utils.register_keras_serializable(package="keras_cv") class Grayscale(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images. Input images should have values in the range of [0, 255]. diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 7463dc1b1a..75a38c5b6d 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -28,6 +28,7 @@ def _center_crop(mask, width, height): return tf.image.crop_to_bounding_box(mask, h_start, w_start, height, width) +@tf.keras.utils.register_keras_serializable(package="keras_cv") class GridMask(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """GridMask class for grid-mask augmentation. @@ -95,12 +96,12 @@ def __init__( self.ratio = ratio.lower() self.fill_mode = fill_mode self.fill_value = fill_value + self.rotation_factor = rotation_factor self.random_rotate = layers.RandomRotation( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) self.seed = seed self.auto_vectorize = True - self._check_parameter_values() def _check_parameter_values(self): diff --git a/keras_cv/layers/preprocessing/mix_up.py b/keras_cv/layers/preprocessing/mix_up.py index 64c3fe8b64..1165bd93b8 100644 --- a/keras_cv/layers/preprocessing/mix_up.py +++ b/keras_cv/layers/preprocessing/mix_up.py @@ -17,6 +17,7 @@ from tensorflow.keras import backend +@tf.keras.utils.register_keras_serializable(package="keras_cv") class MixUp(layers.Layer): """MixUp implements the MixUp data augmentation technique. diff --git a/keras_cv/layers/preprocessing/posterization.py b/keras_cv/layers/preprocessing/posterization.py index 46ec6dd5a0..a995507ab7 100644 --- a/keras_cv/layers/preprocessing/posterization.py +++ b/keras_cv/layers/preprocessing/posterization.py @@ -17,6 +17,7 @@ from keras_cv.utils.preprocessing import transform_value_range +@tf.keras.utils.register_keras_serializable(package="keras_cv") class Posterization(BaseImageAugmentationLayer): """Reduces the number of bits for each color channel. @@ -98,6 +99,6 @@ def _posterize(self, image): ) def get_config(self): - config = {"bits": 8 - self.shift, "value_range": self._value_range} + config = {"bits": 8 - self._shift, "value_range": self._value_range} base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/random_color_degeneration.py b/keras_cv/layers/preprocessing/random_color_degeneration.py index 4af513087b..187440d929 100644 --- a/keras_cv/layers/preprocessing/random_color_degeneration.py +++ b/keras_cv/layers/preprocessing/random_color_degeneration.py @@ -16,6 +16,7 @@ from keras_cv.utils import preprocessing +@tf.keras.utils.register_keras_serializable(package="keras_cv") class RandomColorDegeneration(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Randomly performs the color degeneration operation on given images. @@ -45,7 +46,7 @@ def __init__( super().__init__(**kwargs) self.factor = preprocessing.parse_factor_value_range(factor) - def get_random_transformation(self): + def get_random_transformation(self, image=None, label=None, bounding_box=None): if self.factor[0] == self.factor[1]: return self.factor[0] return self._random_generator.random_uniform( @@ -56,3 +57,8 @@ def augment_image(self, image, transformation=None): degenerate = tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image)) result = preprocessing.blend(image, degenerate, transformation) return result + + def get_config(self): + config = super().get_config() + config.update({"factor": self.factor}) + return config diff --git a/keras_cv/layers/preprocessing/random_cutout.py b/keras_cv/layers/preprocessing/random_cutout.py index 7baa1baaef..b7b67226c6 100644 --- a/keras_cv/layers/preprocessing/random_cutout.py +++ b/keras_cv/layers/preprocessing/random_cutout.py @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf -import tensorflow.keras.layers as layers -from tensorflow.keras import backend from keras_cv.utils import fill_utils -class RandomCutout(layers.Layer): +@tf.keras.utils.register_keras_serializable(package="keras_cv") +class RandomCutout(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Randomly cut out rectangles from images and fill them. Args: @@ -46,12 +45,6 @@ class RandomCutout(layers.Layer): - *gaussian_noise*: Pixels are filled with random gaussian noise. fill_value: a float represents the value to be filled inside the patches when `fill_mode="constant"`. - num_cutouts: One of: - - An integer representing the absolute number of cutouts - - A tuple of size 2, representing the range for the number of cutouts. - For example, `num_cutouts=10` results in 10 cutouts. - `num_cutouts=(2,8)` results in num_cutouts between [2, 8]. Can be used - to implement coarse dropout. Defaults to 1. Sample usage: ```python @@ -67,7 +60,6 @@ def __init__( width_factor, fill_mode="constant", fill_value=0.0, - num_cutouts=1, seed=None, **kwargs, ): @@ -75,11 +67,9 @@ def __init__( self.height_lower, self.height_upper = self._parse_bounds(height_factor) self.width_lower, self.width_upper = self._parse_bounds(width_factor) - self.num_cutouts_lower, self.num_cutouts_upper = self._parse_bounds(num_cutouts) - self.num_cutouts_lower = ( - self.num_cutouts_lower if self.num_cutouts_lower != 0 else 1 - ) - self.num_cutouts_upper += 1 + self.fill_mode = fill_mode + self.fill_value = fill_value + self.seed = seed if fill_mode not in ["gaussian_noise", "constant"]: raise ValueError( @@ -101,13 +91,6 @@ def __init__( type(self.width_lower), type(self.width_upper) ) ) - if not isinstance(self.num_cutouts_lower, type(self.num_cutouts_upper)): - raise ValueError( - "`num_cutouts` must have lower bound and upper bound " - "with same type, got {} and {}".format( - type(self.num_cutouts_lower), type(self.num_cutouts_upper) - ) - ) if self.height_upper < self.height_lower: raise ValueError( @@ -135,56 +118,32 @@ def __init__( "when is float, got {}".format(width_factor) ) - if self.num_cutouts_upper < self.num_cutouts_lower: - raise ValueError( - "`num_cutouts` cannot have upper bound less than lower bound" - ) - if not isinstance(self.num_cutouts_upper, int): - raise ValueError( - "`num_cutouts` must be dtype int, got {}".format( - type(self.num_cutouts_upper) - ) - ) - if not isinstance(self.num_cutouts_lower, int): - raise ValueError( - "`num_cutouts` must be dtype int, got {}".format( - type(self.num_cutouts_lower) - ) - ) - - self.fill_mode = fill_mode - self.fill_value = fill_value - self.seed = seed - def _parse_bounds(self, factor): if isinstance(factor, (tuple, list)): return factor[0], factor[1] else: return type(factor)(0), factor - def call(self, inputs, training=True): - if training is None: - training = backend.learning_phase() - - augment = lambda: self._random_cutout(inputs) - no_augment = lambda: inputs - return tf.cond(tf.cast(training, tf.bool), augment, no_augment) + def get_random_transformation(self, image=None, label=None, bounding_box=None): + center_x, center_y = self._compute_rectangle_position(image) + rectangle_height, rectangle_width = self._compute_rectangle_size(image) + return center_x, center_y, rectangle_height, rectangle_width - def _random_cutout(self, inputs): + def augment_image(self, image, transformation=None): """Apply random cutout.""" - for _ in tf.range(self._sample_num_cutouts()): - center_x, center_y = self._compute_rectangle_position(inputs) - rectangle_height, rectangle_width = self._compute_rectangle_size(inputs) - rectangle_fill = self._compute_rectangle_fill(inputs) - inputs = fill_utils.fill_rectangle( - inputs, - center_x, - center_y, - rectangle_width, - rectangle_height, - rectangle_fill, - ) - return inputs + inputs = tf.expand_dims(image, 0) + center_x, center_y, rectangle_height, rectangle_width = transformation + + rectangle_fill = self._compute_rectangle_fill(inputs) + inputs = fill_utils.fill_rectangle( + inputs, + center_x, + center_y, + rectangle_width, + rectangle_height, + rectangle_fill, + ) + return inputs[0] def _compute_rectangle_position(self, inputs): input_shape = tf.shape(inputs) @@ -193,32 +152,14 @@ def _compute_rectangle_position(self, inputs): input_shape[1], input_shape[2], ) - center_x = tf.random.uniform( - shape=[batch_size], - minval=0, - maxval=image_width, - dtype=tf.int32, - seed=self.seed, + center_x = self._random_generator.random_uniform( + [batch_size], 0, image_width, dtype=tf.int32 ) - center_y = tf.random.uniform( - shape=[batch_size], - minval=0, - maxval=image_height, - dtype=tf.int32, - seed=self.seed, + center_y = self._random_generator.random_uniform( + [batch_size], 0, image_height, dtype=tf.int32 ) return center_x, center_y - def _sample_num_cutouts(self): - num_cutouts = tf.random.uniform( - shape=(1,), - minval=self.num_cutouts_lower, - maxval=self.num_cutouts_upper, - dtype=tf.int32, - seed=self.seed, - ) - return num_cutouts[0] - def _compute_rectangle_size(self, inputs): input_shape = tf.shape(inputs) batch_size, image_height, image_width = ( @@ -226,17 +167,11 @@ def _compute_rectangle_size(self, inputs): input_shape[1], input_shape[2], ) - height = tf.random.uniform( - [batch_size], - minval=self.height_lower, - maxval=self.height_upper, - dtype=tf.float32, + height = self._random_generator.random_uniform( + [batch_size], self.height_lower, self.height_upper, dtype=tf.float32 ) - width = tf.random.uniform( - [batch_size], - minval=self.width_lower, - maxval=self.width_upper, - dtype=tf.float32, + width = self._random_generator.random_uniform( + [batch_size], self.width_lower, self.width_upper, dtype=tf.float32 ) if self._height_is_float: @@ -265,11 +200,10 @@ def _compute_rectangle_fill(self, inputs): def get_config(self): config = { - "height_factor": self.height_factor, - "width_factor": self.width_factor, + "height_factor": (self.height_lower, self.height_upper), + "width_factor": (self.width_lower, self.width_upper), "fill_mode": self.fill_mode, "fill_value": self.fill_value, - "num_cutouts": self.num_cutouts, "seed": self.seed, } base_config = super().get_config() diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py index f31307e2db..afc5e9cdc8 100644 --- a/keras_cv/layers/preprocessing/random_cutout_test.py +++ b/keras_cv/layers/preprocessing/random_cutout_test.py @@ -49,6 +49,14 @@ def test_return_shapes(self): self.assertEqual(xs.shape, [2, 512, 512, 3]) + def test_return_shapes_single_element(self): + xs = tf.ones((512, 512, 3)) + + layer = preprocessing.RandomCutout(height_factor=0.5, width_factor=0.5, seed=1) + xs = layer(xs) + + self.assertEqual(xs.shape, [512, 512, 3]) + def test_random_cutout_single_float(self): self._run_test(0.5, 0.5) diff --git a/keras_cv/layers/preprocessing/random_hue.py b/keras_cv/layers/preprocessing/random_hue.py new file mode 100644 index 0000000000..5ab93fbdee --- /dev/null +++ b/keras_cv/layers/preprocessing/random_hue.py @@ -0,0 +1,67 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +@tf.keras.utils.register_keras_serializable(package="keras_cv") +class RandomHue(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Randomly adjusts the hue on given images. + + This layer will randomly increase/reduce the hue for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the brightness of the input. + + The image hue is adjusted by converting the image(s) to HSV and rotating the + hue channel (H) by delta. The image is then converted back to RGB. + + Args: + factor: Either a tuple of two floats or a single float. `factor` controls the + extent to which the image saturation is impacted. `factor` = + `0.0`, `0.5` or `1.0` makes this layer perform a no-op operation. + `factor=0.25` and `factor=0.75` makes the image to have fully opposite + hue value. Values should be between `0.0` and `1.0`. + If a tuple is used, a `factor` is sampled + between the two values for every image augmented. If a single float is + used, a value between `0.0` and the passed float is sampled. + In order to ensure the value is always the same, please pass a tuple with + two identical floats: `(0.5, 0.5)`. + """ + + def __init__(self, factor, **kwargs): + super().__init__(**kwargs) + self.factor = preprocessing.parse_factor_value_range( + factor, min_value=0.0, max_value=1.0 + ) + + def get_random_transformation(self, image=None, label=None, bounding_box=None): + del image, label, bounding_box + if self.factor[0] == self.factor[1]: + return self.factor[0] + return self._random_generator.random_uniform( + shape=(), minval=self.factor[0], maxval=self.factor[1], dtype=tf.float32 + ) + + def augment_image(self, image, transformation=None): + # Convert the factor range from [0, 1] to [-1.0, 1.0]. + adjust_factor = transformation * 2.0 - 1.0 + return tf.image.adjust_hue(image, delta=adjust_factor) + + def get_config(self): + config = { + "factor": self.factor, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/random_hue_test.py b/keras_cv/layers/preprocessing/random_hue_test.py new file mode 100644 index 0000000000..3475cd2ca4 --- /dev/null +++ b/keras_cv/layers/preprocessing/random_hue_test.py @@ -0,0 +1,108 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class RandomHueTest(tf.test.TestCase): + def test_preserves_output_shape(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomHue(factor=(0.3, 0.8)) + output = layer(image) + + self.assertEqual(image.shape, output.shape) + self.assertNotAllClose(image, output) + + def test_adjust_no_op(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomHue(factor=(0.5, 0.5)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomHue(factor=(0.0, 0.0)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomHue(factor=(1.0, 1.0)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + def test_adjust_full_opposite_hue(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomHue(factor=(0.25, 0.25)) + output = layer(image) + + channel_max = tf.math.reduce_max(output, axis=-1) + channel_min = tf.math.reduce_min(output, axis=-1) + # Make sure the max and min channel are the same between input and output + # In the meantime, and channel will swap between each other. + self.assertAllClose( + channel_max, tf.math.reduce_max(image, axis=-1), atol=1e-5, rtol=1e-5 + ) + self.assertAllClose( + channel_min, tf.math.reduce_min(image, axis=-1), atol=1e-5, rtol=1e-5 + ) + + layer = preprocessing.RandomHue(factor=(0.75, 0.75)) + output = layer(image) + + channel_max = tf.math.reduce_max(output, axis=-1) + channel_min = tf.math.reduce_min(output, axis=-1) + self.assertAllClose( + channel_max, tf.math.reduce_max(image, axis=-1), atol=1e-5, rtol=1e-5 + ) + self.assertAllClose( + channel_min, tf.math.reduce_min(image, axis=-1), atol=1e-5, rtol=1e-5 + ) + + def test_adjustment_for_non_rgb_value_range(self): + image_shape = (4, 8, 8, 3) + # Value range (0, 100) + image = tf.random.uniform(shape=image_shape) * 100.0 + + layer = preprocessing.RandomHue(factor=(0.5, 0.5)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomHue(factor=(0.3, 0.8)) + output = layer(image) + self.assertNotAllClose(image, output) + + def test_with_unit8(self): + image_shape = (4, 8, 8, 3) + image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + + layer = preprocessing.RandomHue(factor=(0.5, 0.5)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomHue(factor=(0.3, 0.8)) + output = layer(image) + self.assertNotAllClose(image, output) + + def test_config(self): + layer = preprocessing.RandomHue(factor=(0.3, 0.8)) + config = layer.get_config() + self.assertEqual(config["factor"], (0.3, 0.8)) + + layer = preprocessing.RandomHue(factor=0.5) + config = layer.get_config() + self.assertEqual(config["factor"], (0.0, 0.5)) diff --git a/keras_cv/layers/preprocessing/random_saturation.py b/keras_cv/layers/preprocessing/random_saturation.py new file mode 100644 index 0000000000..fe1fa6f24f --- /dev/null +++ b/keras_cv/layers/preprocessing/random_saturation.py @@ -0,0 +1,77 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.utils import preprocessing + + +@tf.keras.utils.register_keras_serializable(package="keras_cv") +class RandomSaturation(tf.keras.__internal__.layers.BaseImageAugmentationLayer): + """Randomly adjusts the saturation on given images. + + This layer will randomly increase/reduce the saturation for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the brightness of the input. + + Args: + factor: Either a tuple of two floats or a single float. `factor` controls the + extent to which the image saturation is impacted. `factor=0.5` makes + this layer perform a no-op operation. `factor=0.0` makes the image to be + fully grayscale. `factor=1.0` makes the image to be fully saturated. + + Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` + is sampled between the two values for every image augmented. If a single + float is used, a value between `0.0` and the passed float is sampled. + In order to ensure the value is always the same, please pass a tuple with + two identical floats: `(0.5, 0.5)`. + """ + + def __init__(self, factor, **kwargs): + super().__init__(**kwargs) + self.factor = preprocessing.parse_factor_value_range( + factor, min_value=0.0, max_value=1.0 + ) + + def get_random_transformation(self, image=None, label=None, bounding_box=None): + del image, label, bounding_box + if self.factor[0] == self.factor[1]: + return self.factor[0] + return self._random_generator.random_uniform( + shape=(), minval=self.factor[0], maxval=self.factor[1], dtype=tf.float32 + ) + + def augment_image(self, image, transformation=None): + # Convert the factor range from [0, 1] to [0, +inf]. Note that the + # tf.image.adjust_saturation is trying to apply the following math formula + # `output_saturation = input_saturation * factor`. We use the following + # method to the do the mapping. + # `y = x / (1 - x)`. + # This will ensure: + # y = +inf when x = 1 (full saturation) + # y = 1 when x = 0.5 (no augmentation) + # y = 0 when x = 0 (full gray scale) + + # Convert the transformation to tensor in case it is a float. When + # transformation is 1.0, then it will result in to divide by zero error, but + # it will be handled correctly when it is a one tensor. + transformation = tf.convert_to_tensor(transformation) + adjust_factor = transformation / (1 - transformation) + return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + + def get_config(self): + config = { + "factor": self.factor, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/keras_cv/layers/preprocessing/random_saturation_test.py b/keras_cv/layers/preprocessing/random_saturation_test.py new file mode 100644 index 0000000000..07b410041e --- /dev/null +++ b/keras_cv/layers/preprocessing/random_saturation_test.py @@ -0,0 +1,96 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf + +from keras_cv.layers import preprocessing + + +class RandomSaturationTest(tf.test.TestCase): + def test_preserves_output_shape(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomSaturation(factor=(0.3, 0.8)) + output = layer(image) + + self.assertEqual(image.shape, output.shape) + self.assertNotAllClose(image, output) + + def test_no_adjustment_for_factor_point_five(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomSaturation(factor=(0.5, 0.5)) + output = layer(image) + + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + def test_adjust_to_grayscale(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomSaturation(factor=(0.0, 0.0)) + output = layer(image) + + channel_mean = tf.math.reduce_mean(output, axis=-1) + channel_values = tf.unstack(output, axis=-1) + # Make sure all the pixel has the same value among the channel dim, which is + # a fully gray RGB. + for channel_value in channel_values: + self.assertAllClose(channel_mean, channel_value, atol=1e-5, rtol=1e-5) + + def test_adjust_to_full_saturation(self): + image_shape = (4, 8, 8, 3) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomSaturation(factor=(1.0, 1.0)) + output = layer(image) + + channel_mean = tf.math.reduce_min(output, axis=-1) + # Make sure at least one of the channel is 0.0 (fully saturated image) + self.assertAllClose(channel_mean, tf.zeros((4, 8, 8))) + + def test_adjustment_for_non_rgb_value_range(self): + image_shape = (4, 8, 8, 3) + # Value range (0, 100) + image = tf.random.uniform(shape=image_shape) * 100.0 + + layer = preprocessing.RandomSaturation(factor=(0.5, 0.5)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomSaturation(factor=(0.3, 0.8)) + output = layer(image) + self.assertNotAllClose(image, output) + + def test_with_unit8(self): + image_shape = (4, 8, 8, 3) + image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + + layer = preprocessing.RandomSaturation(factor=(0.5, 0.5)) + output = layer(image) + self.assertAllClose(image, output, atol=1e-5, rtol=1e-5) + + layer = preprocessing.RandomSaturation(factor=(0.3, 0.8)) + output = layer(image) + self.assertNotAllClose(image, output) + + def test_config(self): + layer = preprocessing.RandomSaturation(factor=(0.3, 0.8)) + config = layer.get_config() + self.assertEqual(config["factor"], (0.3, 0.8)) + + layer = preprocessing.RandomSaturation(factor=0.5) + config = layer.get_config() + self.assertEqual(config["factor"], (0.0, 0.5)) diff --git a/keras_cv/layers/preprocessing/random_sharpness.py b/keras_cv/layers/preprocessing/random_sharpness.py index 7fc02dc54a..7b46245ba0 100644 --- a/keras_cv/layers/preprocessing/random_sharpness.py +++ b/keras_cv/layers/preprocessing/random_sharpness.py @@ -16,6 +16,7 @@ from keras_cv.utils import preprocessing +@tf.keras.utils.register_keras_serializable(package="keras_cv") class RandomSharpness(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Randomly performs the sharpness operation on given images. @@ -55,7 +56,7 @@ def __init__( self.value_range = value_range self.factor = preprocessing.parse_factor_value_range(factor) - def get_random_transformation(self): + def get_random_transformation(self, image=None, label=None, bounding_box=None): if self.factor[0] == self.factor[1]: return self.factor[0] return self._random_generator.random_uniform( @@ -110,3 +111,8 @@ def augment_image(self, image, transformation=None): result, original_range=(0, 255), target_range=self.value_range ) return result + + def get_config(self): + config = super().get_config() + config.update({"factor": self.factor, "value_range": self.value_range}) + return config diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py index 8fd643c1d2..9f8c00ba1b 100644 --- a/keras_cv/layers/preprocessing/random_shear.py +++ b/keras_cv/layers/preprocessing/random_shear.py @@ -18,6 +18,7 @@ from keras_cv.utils import preprocessing +@tf.keras.utils.register_keras_serializable(package="keras_cv") class RandomShear(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Randomly shears an image. @@ -70,7 +71,7 @@ def __init__( self.fill_mode = fill_mode self.fill_value = fill_value - def get_random_transformation(self): + def get_random_transformation(self, image=None, label=None, bounding_box=None): x = self._get_shear_amount(self.x) y = self._get_shear_amount(self.y) return (x, y) @@ -121,3 +122,16 @@ def augment_image(self, image, transformation=None): def _format_transform(transform): transform = tf.convert_to_tensor(transform, dtype=tf.float32) return transform[tf.newaxis] + + def get_config(self): + config = super().get_config() + config.update( + { + "x": self.x, + "y": self.y, + "interpolation": self.interpolation, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + } + ) + return config diff --git a/keras_cv/layers/preprocessing/serialization_test.py b/keras_cv/layers/preprocessing/serialization_test.py new file mode 100644 index 0000000000..528236e35d --- /dev/null +++ b/keras_cv/layers/preprocessing/serialization_test.py @@ -0,0 +1,52 @@ +# Copyright 2022 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import tensorflow as tf +from absl.testing import parameterized + +from keras_cv.layers import preprocessing + + +class SerializationTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters( + ("AutoContrast", preprocessing.AutoContrast, {}), + ("ChannelShuffle", preprocessing.ChannelShuffle, {}), + ("CutMix", preprocessing.CutMix, {}), + ("Equalization", preprocessing.Equalization, {}), + ("Grayscale", preprocessing.Grayscale, {}), + ("GridMask", preprocessing.GridMask, {}), + ("MixUp", preprocessing.MixUp, {}), + ("Posterization", preprocessing.Posterization, {"bits": 3}), + ( + "RandomColorDegeneration", + preprocessing.RandomColorDegeneration, + {"factor": 0.5}, + ), + ( + "RandomCutout", + preprocessing.RandomCutout, + {"height_factor": 0.2, "width_factor": 0.2}, + ), + ("RandomHue", preprocessing.RandomHue, {"factor": 0.5}), + ("RandomSaturation", preprocessing.RandomSaturation, {"factor": 0.5}), + ("RandomSharpness", preprocessing.RandomSharpness, {"factor": 0.5}), + ("RandomShear", preprocessing.RandomShear, {"x": 0.3, "y": 0.3}), + ("Solarization", preprocessing.Solarization, {}), + ) + def test_layer_serialization(self, layer_cls, init_args): + layer = layer_cls(**init_args) + model = tf.keras.models.Sequential(layer) + model_config = model.get_config() + reconstructed_model = tf.keras.Sequential().from_config(model_config) + reconstructed_layer = reconstructed_model.layers[0] + self.assertEqual(layer.get_config(), reconstructed_layer.get_config()) diff --git a/keras_cv/layers/preprocessing/solarization.py b/keras_cv/layers/preprocessing/solarization.py index eaee108177..e1e1eb08aa 100644 --- a/keras_cv/layers/preprocessing/solarization.py +++ b/keras_cv/layers/preprocessing/solarization.py @@ -16,6 +16,7 @@ from keras_cv.utils import preprocessing +@tf.keras.utils.register_keras_serializable(package="keras_cv") class Solarization(tf.keras.__internal__.layers.BaseImageAugmentationLayer): """Applies (max_value - pixel + min_value) for each pixel in the image. @@ -58,13 +59,8 @@ class Solarization(tf.keras.__internal__.layers.BaseImageAugmentationLayer): or [height, width, channels]. """ - def __init__( - self, - addition=0.0, - threshold=0.0, - value_range=(0, 255), - ): - super().__init__() + def __init__(self, addition=0.0, threshold=0.0, value_range=(0, 255), **kwargs): + super().__init__(**kwargs) self.addition = addition self.threshold = threshold self.value_range = value_range From a92611554db9aceff9359a831087c1d242cf0e18 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Wed, 30 Mar 2022 19:56:40 +0200 Subject: [PATCH 41/43] GridMask to BaseImageAugmentationLayer --- keras_cv/layers/preprocessing/grid_mask.py | 43 +++++++++++++--------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 75a38c5b6d..ade892c082 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -101,7 +101,7 @@ def __init__( factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed ) self.seed = seed - self.auto_vectorize = True + self.auto_vectorize = False self._check_parameter_values() def _check_parameter_values(self): @@ -132,11 +132,24 @@ def _check_parameter_values(self): def get_random_transformation(self, image=None, label=None, bounding_box=None): if self.ratio == "random": - return tf.random.uniform( + ratio = tf.random.uniform( shape=(), minval=0, maxval=1, dtype=tf.float32, seed=self.seed ) else: - return self.ratio + ratio = self.ratio + + # compute grid mask + input_shape = tf.shape(image) + mask = self._compute_grid_mask(input_shape, ratio=ratio) + + # convert mask to single-channel images + mask = tf.cast(mask, tf.float32) + mask = tf.expand_dims(mask, axis=-1) + + # randomly rotate mask + mask = self.random_rotate(mask) + + return mask def _compute_grid_mask(self, input_shape, ratio): height = tf.cast(input_shape[0], tf.float32) @@ -156,8 +169,12 @@ def _compute_grid_mask(self, input_shape, ratio): rectangle_side_len = tf.cast((1 - ratio) * unit_size, tf.float32) # sample x and y offset for grid units randomly between 0 and unit_size - delta_x = tf.random.uniform(shape=(), minval=0, maxval=unit_size, dtype=tf.float32) - delta_y = tf.random.uniform(shape=(), minval=0, maxval=unit_size, dtype=tf.float32) + delta_x = tf.random.uniform( + shape=(), minval=0, maxval=unit_size, dtype=tf.float32 + ) + delta_y = tf.random.uniform( + shape=(), minval=0, maxval=unit_size, dtype=tf.float32 + ) # grid size (number of diagonal units in grid) grid_size = mask_side_len // unit_size + 1 @@ -182,25 +199,17 @@ def _compute_grid_mask(self, input_shape, ratio): corners = tf.stack([x0, y0, x1, y1], axis=-1) mask_side_len = tf.cast(mask_side_len, tf.int32) - rectangle_masks = fill_utils.corners_to_mask(corners, mask_shape=(mask_side_len, mask_side_len)) + rectangle_masks = fill_utils.corners_to_mask( + corners, mask_shape=(mask_side_len, mask_side_len) + ) grid_mask = tf.reduce_any(rectangle_masks, axis=0) return grid_mask def augment_image(self, image, transformation=None): - ratio = transformation + mask = transformation input_shape = tf.shape(image) - # compute grid masks - mask = self._compute_grid_mask(input_shape, ratio) - - # convert mask to single-channel images - mask = tf.cast(mask, tf.float32) - mask = tf.expand_dims(mask, axis=-1) - - # randomly rotate mask - mask = self.random_rotate(mask) - # center crop mask input_height = input_shape[0] input_width = input_shape[1] From 5287f7fa6fc727505382023fe133efbdce1a0193 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Fri, 1 Apr 2022 00:03:04 +0200 Subject: [PATCH 42/43] Apply changes from review --- keras_cv/layers/preprocessing/grid_mask.py | 47 ++++++++++++---------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index ade892c082..f48b5d0cb4 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -87,7 +87,6 @@ def __init__( rotation_factor=0.15, fill_mode="constant", fill_value=0.0, - seed=None, **kwargs, ): super().__init__(**kwargs) @@ -98,9 +97,11 @@ def __init__( self.fill_value = fill_value self.rotation_factor = rotation_factor self.random_rotate = layers.RandomRotation( - factor=rotation_factor, fill_mode="constant", fill_value=0.0, seed=seed + factor=rotation_factor, + fill_mode="constant", + fill_value=0.0, + seed=self._random_generator._seed, ) - self.seed = seed self.auto_vectorize = False self._check_parameter_values() @@ -132,8 +133,8 @@ def _check_parameter_values(self): def get_random_transformation(self, image=None, label=None, bounding_box=None): if self.ratio == "random": - ratio = tf.random.uniform( - shape=(), minval=0, maxval=1, dtype=tf.float32, seed=self.seed + ratio = self._random_generator.random_uniform( + shape=(), minval=0.0, maxval=1.0, dtype=tf.float32 ) else: ratio = self.ratio @@ -142,25 +143,34 @@ def get_random_transformation(self, image=None, label=None, bounding_box=None): input_shape = tf.shape(image) mask = self._compute_grid_mask(input_shape, ratio=ratio) - # convert mask to single-channel images + # convert mask to single-channel image mask = tf.cast(mask, tf.float32) mask = tf.expand_dims(mask, axis=-1) # randomly rotate mask mask = self.random_rotate(mask) - return mask + # compute fill + if self.fill_mode == "constant": + fill_value = tf.fill(input_shape, self.fill_value) + else: + # gaussian noise + fill_value = self._random_generator.random_normal( + shape=input_shape, dtype=image.dtype + ) + + return mask, fill_value def _compute_grid_mask(self, input_shape, ratio): height = tf.cast(input_shape[0], tf.float32) width = tf.cast(input_shape[1], tf.float32) - # masks side length + # mask side length input_diagonal_len = tf.sqrt(tf.square(width) + tf.square(height)) mask_side_len = tf.math.ceil(input_diagonal_len) # grid unit size - unit_size = tf.random.uniform( + unit_size = self._random_generator.random_uniform( shape=(), minval=tf.math.minimum(height * 0.5, width * 0.3), maxval=tf.math.maximum(height * 0.5, width * 0.3) + 1, @@ -169,11 +179,11 @@ def _compute_grid_mask(self, input_shape, ratio): rectangle_side_len = tf.cast((1 - ratio) * unit_size, tf.float32) # sample x and y offset for grid units randomly between 0 and unit_size - delta_x = tf.random.uniform( - shape=(), minval=0, maxval=unit_size, dtype=tf.float32 + delta_x = self._random_generator.random_uniform( + shape=(), minval=0.0, maxval=unit_size, dtype=tf.float32 ) - delta_y = tf.random.uniform( - shape=(), minval=0, maxval=unit_size, dtype=tf.float32 + delta_y = self._random_generator.random_uniform( + shape=(), minval=0.0, maxval=unit_size, dtype=tf.float32 ) # grid size (number of diagonal units in grid) @@ -197,6 +207,7 @@ def _compute_grid_mask(self, input_shape, ratio): x1 = tf.reshape(x1, [-1]) y1 = tf.reshape(y1, [-1]) + # convert coordinates to mask corners = tf.stack([x0, y0, x1, y1], axis=-1) mask_side_len = tf.cast(mask_side_len, tf.int32) rectangle_masks = fill_utils.corners_to_mask( @@ -207,7 +218,7 @@ def _compute_grid_mask(self, input_shape, ratio): return grid_mask def augment_image(self, image, transformation=None): - mask = transformation + mask, fill_value = transformation input_shape = tf.shape(image) # center crop mask @@ -218,13 +229,6 @@ def augment_image(self, image, transformation=None): # convert back to boolean mask mask = tf.cast(mask, tf.bool) - # fill - if self.fill_mode == "constant": - fill_value = tf.fill(input_shape, self.fill_value) - else: - # gaussian noise - fill_value = tf.random.normal(input_shape) - return tf.where(mask, fill_value, image) def get_config(self): @@ -233,7 +237,6 @@ def get_config(self): "rotation_factor": self.rotation_factor, "fill_mode": self.fill_mode, "fill_value": self.fill_value, - "seed": self.seed, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) From e4debc248a9b2df53ad99bab5ff5b6d8e9244ff7 Mon Sep 17 00:00:00 2001 From: Christoffer Hjort Date: Fri, 1 Apr 2022 00:27:31 +0200 Subject: [PATCH 43/43] fix test --- keras_cv/utils/fill_utils_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/keras_cv/utils/fill_utils_test.py b/keras_cv/utils/fill_utils_test.py index 0d48d0f7be..7061aa419b 100644 --- a/keras_cv/utils/fill_utils_test.py +++ b/keras_cv/utils/fill_utils_test.py @@ -138,29 +138,29 @@ def test_width_out_of_upper_bound(self): def test_height_out_of_lower_bound(self): expected = tf.constant( [ - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], - [1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], ] ) - corners = tf.constant([[-2, -2, 2, 3]], dtype=tf.float32) + corners = tf.constant([[1, -3, 4, 2]], dtype=tf.float32) self._run_test(corners, expected) def test_height_out_of_upper_bound(self): expected = tf.constant( [ - [0, 0, 0, 0, 1, 1], - [0, 0, 0, 0, 1, 1], - [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 0, 0], ] ) - corners = tf.constant([[4, 0, 8, 3]], dtype=tf.float32) + corners = tf.constant([[1, 4, 4, 9]], dtype=tf.float32) self._run_test(corners, expected) def test_start_out_of_upper_bound(self):