From 1606c6d772c14a2990774d1916e1d823efd75707 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 10 Nov 2023 16:33:25 +0100 Subject: [PATCH 01/15] Input-Ouput Relation for Bottleneck Block --- scripts/nn/networks/resnet.dml | 74 +++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index a7a62cb2224..48a09694f23 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -75,14 +75,14 @@ basic_block_forward = function(matrix[double] X, list[unknown] weights, * -> 5: Weights of batch norm 2, of shape (C_base, 1). * -> 6: Bias of batch norm 2, of shape (C_base, 1). * If the block should downsample X: - * -> 7: Weights of downsample conv, of shape (C_base, C_in*3*3). + * -> 7: Weights of downsample conv, of shape (C_base, C_in*1*1). * -> 8: Weights of downsample batch norm, of shape (C_base, 1). * -> 9: Bias of downsample batch norm, of shape (C_base, 1). * - C_in: Number of input channels. * - C_base: Number of base channels for this block. * - Hin: Input height. * - Win: Input width. - * - strideh: Stride over height (usually 1 or 2).. + * - strideh: Stride over height (usually 1 or 2). * - stridew: Stride over width (usually same as strideh). * - mode: 'train' or 'test' to indicate if the model is currently * being trained or tested for badge normalization layers. @@ -170,6 +170,76 @@ basic_block_forward = function(matrix[double] X, list[unknown] weights, } } +bottleneck_block_forward = function(matrix[double] X, + list[unknown] weights, int C_in, int C_base, int Hin, + int Win, int strideh, int stridew, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, int Hout, int Wout, + list[unknown] ema_means_vars_upd) { + /* + * Computes the forward pass for a bottleneck residual + * block. + * This residual block architecture is used in the + * bigger ResNets. They consist of 3 convolutional + * layers - 1x1, 3x3, 1x1. The last layer increases + * the number of channels by a factor of 4 which is + * downscaled by the first layer of the next res block + * to always keep computational complexity at a + * minimum. + * The downsampling of the image dimensions (Hin & Win) + * through the stride is placed at the 3x3 conv layer + * instead of the first 1x1 layer (as proposed in "Deep + * residual learning for image recognition") which is + * called ResNet V1.5 and introduced in + * https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win). + * - weights: list of weights for all layers of res block + * with the following order/content: + * -> 1: Weights of conv 1, of shape (C_base, C_in*1*1). + * -> 2: Weights of batch norm 1, of shape (C_base, 1). + * -> 3: Bias of batch norm 1, of shape (C_base, 1). + * -> 4: Weights of conv 2, of shape (C_base, C_base*3*3). + * -> 5: Weights of batch norm 2, of shape (C_base, 1). + * -> 6: Bias of batch norm 2, of shape (C_base, 1). + * -> 7: Weights of conv 3, of shape (4*C_base, C_base*1*1). + * -> 8: Weights of batch norm 3, of shape (4*C_base, 1). + * -> 9: Bias of batch norm 3, of shape (4*C_base, 1). + * If the block should downsample X: + * -> 10: Weights of downsample conv, of shape (4*C_base, C_in*1*1). + * -> 11: Weights of downsample batch norm, of shape (4*C_base, 1). + * -> 12: Bias of downsample batch norm, of shape (4*C_base, 1). + * - C_in: Number of input channels. + * - C_base: Number of base channels for this block. + * - Hin: Input height. + * - Win: Input width. + * - strideh: Stride over height (usually 1 or 2). + * - stridew: Stride over width (usually same as strideh). + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers. + * -> 1: EMA for mean of badge norm 1, of shape (C_base, 1). + * -> 2: EMA for variance of badge norm 1, of shape (C_base, 1). + * -> 3: EMA for mean of badge norm 2, of shape (C_base, 1). + * -> 4: EMA for variance of badge norm 2, of shape (C_base, 1). + * -> 5: EMA for mean of badge norm 3, of shape (4*C_base, 1). + * -> 6: EMA for variance of badge norm 3, of shape (4*C_base, 1). + * If the block should downsample X: + * -> 7: EMA for mean of downs. badge norm, of shape (4*C_base, 1). + * -> 8: EMA for variance of downs. badge norm, of shape (4*C_base, 1). + * + * Outputs: + * - out: Output, of shape (N, 4*C_base*Hout*Wout). + * - Hout: Output height. + * - Wout: Output width. + * - ema_means_vars_upd: List of updated exponential moving averages + * for mean and variance of badge normalization layers. + */ +} + basic_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks, int strideh, int stridew, int C_in, int C_base, list[unknown] blocks_weights, string mode, From 5b893437eae0b0d57a2eaff307460c86ddd77935 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 10 Nov 2023 16:50:33 +0100 Subject: [PATCH 02/15] Implementation of Bottleneck block --- scripts/nn/networks/resnet.dml | 79 ++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index 48a09694f23..def05893433 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -238,6 +238,85 @@ bottleneck_block_forward = function(matrix[double] X, * - ema_means_vars_upd: List of updated exponential moving averages * for mean and variance of badge normalization layers. */ + downsample = strideh > 1 | stridew > 1 | C_in != 4 * C_base + # default values + mu_bn = 0.1 + epsilon_bn = 1e-05 + + # get all params + W_conv1 = as.matrix(weights[1]) + gamma_bn1 = as.matrix(weights[2]) + beta_bn1 = as.matrix(weights[3]) + W_conv2 = as.matrix(weights[4]) + gamma_bn2 = as.matrix(weights[5]) + beta_bn2 = as.matrix(weights[6]) + W_conv3 = as.matrix(weights[7]) + gamma_bn3 = as.matrix(weights[8]) + beta_bn3 = as.matrix(weights[9]) + + ema_mean_bn1 = as.matrix(ema_means_vars[1]) + ema_var_bn1 = as.matrix(ema_means_vars[2]) + ema_mean_bn2 = as.matrix(ema_means_vars[3]) + ema_var_bn2 = as.matrix(ema_means_vars[4]) + ema_mean_bn3 = as.matrix(ema_means_vars[5]) + ema_var_bn3 = as.matrix(ema_means_vars[6]) + + if (downsample) { + # gather params for donwsampling + W_conv4 = as.matrix(weights[10]) + gamma_bn4 = as.matrix(weights[11]) + beta_bn4 = as.matrix(weights[12]) + ema_mean_bn4 = as.matrix(ema_means_vars[7]) + ema_var_bn4 = as.matrix(ema_means_vars[8]) + } + + # RESIDUAL PATH + # First convolutional layer + [out, Hout, Wout] = conv1x1_forward(X, W_conv1, C_in, C_base, Hin, Win, + 1, 1) + [out, ema_mean_bn1_upd, ema_var_bn1_upd, c_m, c_v] = bn2d::forward(out, gamma_bn1, + beta_bn1, C_base, Hout, Wout, + mode, ema_mean_bn1, ema_var_bn1, + mu_bn, epsilon_bn) + out = relu::forward(out) + + # Second convolutional layer + [out, Hout, Wout] = conv3x3_forward(out, W_conv2, C_base, C_base, Hout, + Wout, strideh, stridew) + [out, ema_mean_bn2_upd, ema_var_bn2_upd, c_m, c_v] = bn2d::forward(out, gamma_bn2, + beta_bn2, C_base, Hout, Wout, + mode, ema_mean_bn2, ema_var_bn2, + mu_bn, epsilon_bn) + out = relu::forward(out) + + # Third convolutional layer + [out, Hout, Wout] = conv1x1_forward(out, W_conv3, C_base, 4*C_base, Hout, + Wout, 1, 1) + [out, ema_mean_bn3_upd, ema_var_bn3_upd, c_m, c_v] = bn2d::forward(out, gamma_bn3, + beta_bn3, 4*C_base, Hout, Wout, + mode, ema_mean_bn3, ema_var_bn3, + mu_bn, epsilon_bn) + + # IDENTITY PATH + identity = X + if (downsample) { + # downsample input + [identity, Hout, Wout] = conv1x1_forward(X, W_conv4, C_in, 4*C_base, + Hin, Win, strideh, stridew) + [identity, ema_mean_bn4_upd, ema_var_bn4_upd, c_m, c_v] = bn2d::forward(identity, + gamma_bn4, beta_bn4, 4*C_base, Hout, Wout, + mode, ema_mean_bn4, ema_var_bn4, mu_bn, + epsilon_bn) + } + + out = relu::forward(out + identity) + + ema_means_vars_upd = list(ema_mean_bn1_upd, ema_var_bn1_upd, ema_mean_bn2_upd, + ema_var_bn2_upd, ema_mean_bn3_upd, ema_var_bn3_upd) + if (downsample) { + ema_means_vars_upd = append(ema_means_vars_upd, ema_mean_bn3_upd) + ema_means_vars_upd = append(ema_means_vars_upd, ema_var_bn3_upd) + } } basic_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks, From 6f6bf5fd502c9d407f8980556f6e28e90fcd9012 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 10 Nov 2023 18:05:09 +0100 Subject: [PATCH 03/15] Renamed component test for basic block functionality --- .../org/apache/sysds/test/applications/nn/NNComponentTest.java | 2 +- .../applications/nn/component/{resnet.dml => resnet_basic.dml} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/test/scripts/applications/nn/component/{resnet.dml => resnet_basic.dml} (100%) diff --git a/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java b/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java index a41fc235f52..b3eb7536ce2 100644 --- a/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java +++ b/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java @@ -115,7 +115,7 @@ public void logcosh(){ @Test public void resnet() { - run("resnet.dml"); + run("resnet_basic.dml"); } @Override diff --git a/src/test/scripts/applications/nn/component/resnet.dml b/src/test/scripts/applications/nn/component/resnet_basic.dml similarity index 100% rename from src/test/scripts/applications/nn/component/resnet.dml rename to src/test/scripts/applications/nn/component/resnet_basic.dml From 8f61fd1f8e2edaec07f4b3a73ba54c7dd3420695 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 10 Nov 2023 18:06:37 +0100 Subject: [PATCH 04/15] Created new component test for bottleneck --- .../org/apache/sysds/test/applications/nn/NNComponentTest.java | 1 + src/test/scripts/applications/nn/component/resnet_bottleneck.dml | 0 2 files changed, 1 insertion(+) create mode 100644 src/test/scripts/applications/nn/component/resnet_bottleneck.dml diff --git a/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java b/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java index b3eb7536ce2..86b2f64bb7a 100644 --- a/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java +++ b/src/test/java/org/apache/sysds/test/applications/nn/NNComponentTest.java @@ -116,6 +116,7 @@ public void logcosh(){ @Test public void resnet() { run("resnet_basic.dml"); + run("resnet_bottleneck.dml"); } @Override diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml new file mode 100644 index 00000000000..e69de29bb2d From cfd68d56e4a5cbd4e333f741c42ba7929cd8fbcf Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 10 Nov 2023 19:40:49 +0100 Subject: [PATCH 05/15] Incomplete component test --- .../nn/component/resnet_bottleneck.dml | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index e69de29bb2d..ae9c75fff7b 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -0,0 +1,176 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +source("scripts/nn/networks/resnet.dml") as resnet +source("src/test/scripts/applications/nn/util.dml") as test_util + +values_test_bottleneck_forward_1 = function() { + /* + * Testing of values for forward pass of basic block against PyTorch. + */ + strideh = 1; stridew = 1 + C_in = 2; C_base = 2 + Hin = 2; Win = 2 + N = 3 + X = matrix(1, rows=N, cols=C_in*Hin*Win) + W_conv1 = matrix(" 0.36123914 + + 0.2112847 + + + -0.37620437 + + 0.34918302 ", + rows=C_base, cols=C_in*1*1) + gamma_bn1 = matrix(1, rows=C_base, cols=1) + beta_bn1 = matrix(0, rows=C_base, cols=1) + W_conv2 = matrix(" 0.05199759 -0.06049253 -0.04309021 + -0.21535346 0.12896903 0.15667327 + -0.09216988 0.06201397 0.13056289 + + -0.08724584 0.10121109 -0.03631943 + -0.01245737 0.02210985 -0.06935176 + -0.08781703 -0.09042393 -0.01406386 + + + -0.12051006 -0.16107613 -0.21704304 + 0.01695921 -0.03490813 0.01341523 + -0.01890042 -0.09000328 -0.16045968 + + 0.20279111 -0.08519125 -0.03917991 + -0.08972444 0.1956595 -0.10737693 + -0.05993287 0.06089772 -0.06617799 ", + rows=C_base, cols=C_base*3*3) + gamma_bn2 = matrix(1, rows=C_base, cols=1) + beta_bn2 = matrix(0, rows=C_base, cols=1) + W_conv3 = matrix(" -6.3223720e-02 + + -6.7343938e-01 + + + -2.9717910e-01 + + 5.5539685e-01 + + + -6.3470721e-02 + + 2.8451627e-01 + + + -2.3001271e-01 + + 7.0012289e-01 + + + -5.6478333e-01 + + -1.3797164e-01 + + + 7.7093422e-02 + + 3.5912853e-01 + + + -5.4846668e-01 + + 1.1951989e-01 + + + -6.7457885e-01 + + 4.0823221e-04", + rows=4*C_base, cols=C_base*1*1) + gamma_bn3 = matrix(1, rows=4*C_base, cols=1) + beta_bn3 = matrix(0, rows=4*C_base, cols=1) + # downsample weights + W_conv4 = matrix(" 0.27093774 + + 0.20230454 + + + -0.14230269 + + -0.12077826 + + + 0.67874914 + + 0.08611172 + + + 0.6857756 + + -0.17132008 + + + -0.16158098 + + -0.6565306 + + + 0.59828144 + + -0.14310724 + + + -0.20270991 + + 0.20009744 + + + 0.24481595 + + -0.70355356", + rows=4*C_base, cols=C_base*1*1) + gamma_bn4 = matrix(1, rows=4*C_base, cols=1) + beta_bn4 = matrix(0, rows=4*C_base, cols=1) + + weights = list(W_conv1, gamma_bn1, beta_bn1, W_conv2, gamma_bn2, beta_bn2, + W_conv3, gamma_bn3, beta_bn3, W_conv4, gamma_bn4, beta_bn4) + mode = "train" + + ema_mean_bn1 = matrix(0, rows=C_base, cols=1) + ema_var_bn1 = matrix(0, rows=C_base, cols=1) + ema_mean_bn2 = matrix(0, rows=C_base, cols=1) + ema_var_bn2 = matrix(0, rows=C_base, cols=1) + ema_mean_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_mean_bn4 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn4 = matrix(0, rows=4*C_base, cols=1) + + ema_means_vars = list(ema_mean_bn1, ema_var_bn1, ema_mean_bn2, ema_var_bn2, + ema_mean_bn3, ema_var_bn3, ema_mean_bn4, ema_var_bn4) + + [out, Hout, Wout, ema_means_vars_up] = resnet::bottleneck_block_forward(X, weights, C_in, C_base, Hin, Win, + strideh, stridew, mode, ema_means_vars) + + Hout_exp = 2 + Wout_exp = 2 + #out_expected = matrix("", + # rows=N, cols=Hout_exp*Wout_exp*4*C_base) + + #test_util::check_all_close(out, out_expected, 0.00001) + print(toString(out)) +} + +values_test_bottleneck_forward_1() From cba18a30777b3809333593b77b26a69423b96933 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 12:24:23 +0100 Subject: [PATCH 06/15] New component test for bottleneck block --- .../nn/component/resnet_bottleneck.dml | 177 ++++++++++++------ 1 file changed, 124 insertions(+), 53 deletions(-) diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index ae9c75fff7b..f30046e9b58 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -30,117 +30,117 @@ values_test_bottleneck_forward_1 = function() { C_in = 2; C_base = 2 Hin = 2; Win = 2 N = 3 - X = matrix(1, rows=N, cols=C_in*Hin*Win) - W_conv1 = matrix(" 0.36123914 + X = matrix(seq(1, N*C_in*Hin*Win), rows=N, cols=C_in*Hin*Win) + W_conv1 = matrix(" 0.5185197 - 0.2112847 + -0.01638347 - -0.37620437 + -0.41337225 - 0.34918302 ", + -0.27387595", rows=C_base, cols=C_in*1*1) gamma_bn1 = matrix(1, rows=C_base, cols=1) beta_bn1 = matrix(0, rows=C_base, cols=1) - W_conv2 = matrix(" 0.05199759 -0.06049253 -0.04309021 - -0.21535346 0.12896903 0.15667327 - -0.09216988 0.06201397 0.13056289 + W_conv2 = matrix(" -0.20811445 0.15620266 -0.02045412 + 0.16239561 0.08874698 -0.04675962 + -0.15071772 0.08203228 -0.15123627 - -0.08724584 0.10121109 -0.03631943 - -0.01245737 0.02210985 -0.06935176 - -0.08781703 -0.09042393 -0.01406386 + -0.05238193 0.14012058 -0.12834892 + -0.01153079 -0.03199132 0.02700911 + 0.22693978 0.1377839 -0.08849475 - -0.12051006 -0.16107613 -0.21704304 - 0.01695921 -0.03490813 0.01341523 - -0.01890042 -0.09000328 -0.16045968 + 0.20858495 -0.07665969 0.09709726 + 0.09753416 -0.00713645 -0.21934068 + 0.2215875 -0.05444418 0.16051485 - 0.20279111 -0.08519125 -0.03917991 - -0.08972444 0.1956595 -0.10737693 - -0.05993287 0.06089772 -0.06617799 ", + -0.14136882 -0.02405146 0.20206784 + -0.20452432 0.00909661 -0.00307493 + -0.20145056 -0.0456574 -0.21605067", rows=C_base, cols=C_base*3*3) gamma_bn2 = matrix(1, rows=C_base, cols=1) beta_bn2 = matrix(0, rows=C_base, cols=1) - W_conv3 = matrix(" -6.3223720e-02 + W_conv3 = matrix(" 0.1527785 - -6.7343938e-01 + 0.11908448 - -2.9717910e-01 + 0.40458113 - 5.5539685e-01 + 0.01768601 - -6.3470721e-02 + -0.2847237 - 2.8451627e-01 + -0.25494343 - -2.3001271e-01 + -0.26399058 - 7.0012289e-01 + 0.5500943 - -5.6478333e-01 + 0.22660124 - -1.3797164e-01 + -0.7040346 - 7.7093422e-02 + 0.12641346 - 3.5912853e-01 + -0.09441459 - -5.4846668e-01 + 0.5158523 - 1.1951989e-01 + -0.20682847 - -6.7457885e-01 + 0.61719567 - 4.0823221e-04", + 0.21850073", rows=4*C_base, cols=C_base*1*1) gamma_bn3 = matrix(1, rows=4*C_base, cols=1) beta_bn3 = matrix(0, rows=4*C_base, cols=1) # downsample weights - W_conv4 = matrix(" 0.27093774 + W_conv4 = matrix(" -0.3727211 - 0.20230454 + -0.38662055 - -0.14230269 + 0.42501384 - -0.12077826 + -0.4678393 - 0.67874914 + -0.33239904 - 0.08611172 + 0.38465446 - 0.6857756 + -0.5258211 - -0.17132008 + 0.346785 - -0.16158098 + 0.43058223 - -0.6565306 + 0.19192165 - 0.59828144 + 0.12673676 - -0.14310724 + 0.27335274 - -0.20270991 + 0.5349248 - 0.20009744 + 0.05752403 - 0.24481595 + -0.5090851 - -0.70355356", + 0.6524388", rows=4*C_base, cols=C_base*1*1) gamma_bn4 = matrix(1, rows=4*C_base, cols=1) beta_bn4 = matrix(0, rows=4*C_base, cols=1) @@ -166,11 +166,82 @@ values_test_bottleneck_forward_1 = function() { Hout_exp = 2 Wout_exp = 2 - #out_expected = matrix("", - # rows=N, cols=Hout_exp*Wout_exp*4*C_base) + out_expected = matrix(" 0.6000617 2.6354597 + 1.3651271 0.14736581 - #test_util::check_all_close(out, out_expected, 0.00001) - print(toString(out)) + 0.79244256 3.4513025 + 0.5675593 0.33977485 + + 0. 0. + 0. 0. + + 1.0696087 0. + 2.9806733 0.6169146 + + 0. 0.42738855 + 0. 0. + + 0. 1.3223773 + 0. 0. + + 0. 1.2508607 + 0. 0. + + 0. 0.5346739 + 0. 0. + + + 0. 0. + 0. 0. + + 0. 0. + 0. 0. + + 0.19081071 0.62350845 + 0.7712998 0.87608457 + + 0.6735816 0. + 0. 0. + + 0. 0.14371195 + 0.28843445 0.3475034 + + 0. 0. + 0. 0. + + 0. 0. + 0. 0. + + 0. 0. + 0. 0. + + + 0. 0.14938474 + 0. 0.75312185 + + 0. 0. + 0. 0.5097313 + + 1.8268178 0. + 1.3909308 0. + + 0. 0.50544345 + 0. 0. + + 1.492898 0. + 2.218107 0.67900985 + + 0.80380654 0.4615401 + 2.0888937 2.4350812 + + 0.58336127 0.9749389 + 1.9210864 2.9139411 + + 0.22877222 2.045148 + 1.4386882 3.5448616", + rows=N, cols=Hout_exp*Wout_exp*4*C_base) + + test_util::check_all_close(out, out_expected, 0.00001) } values_test_bottleneck_forward_1() From b6427190bdd48c1f8bcf8aa352b0f269f7ecc569 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 16:38:11 +0100 Subject: [PATCH 07/15] New bottleneck block test case --- .../nn/component/resnet_bottleneck.dml | 502 +++++++++++++++++- 1 file changed, 501 insertions(+), 1 deletion(-) diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index f30046e9b58..9496dcf4616 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -141,7 +141,7 @@ values_test_bottleneck_forward_1 = function() { -0.5090851 0.6524388", - rows=4*C_base, cols=C_base*1*1) + rows=4*C_base, cols=C_in*1*1) gamma_bn4 = matrix(1, rows=4*C_base, cols=1) beta_bn4 = matrix(0, rows=4*C_base, cols=1) @@ -244,4 +244,504 @@ values_test_bottleneck_forward_1 = function() { test_util::check_all_close(out, out_expected, 0.00001) } +values_test_bottleneck_forward_2 = function() { + /* + * Testing of values for forward pass of basic block against PyTorch. + */ + strideh = 2; stridew = 2 + C_in = 2; C_base = 4 + Hin = 4; Win = 4 + N = 3 + X = matrix(seq(1, N*C_in*Hin*Win), rows=N, cols=C_in*Hin*Win) + W_conv1 = matrix(" 0.54590577 + + 0.11767608 + + + -0.22960076 + + 0.43695658 + + + 0.11020315 + + 0.5713164 + + + 0.07730067 + + -0.22300252", + rows=C_base, cols=C_in*1*1) + gamma_bn1 = matrix(1, rows=C_base, cols=1) + beta_bn1 = matrix(0, rows=C_base, cols=1) + W_conv2 = matrix(" 0.04478061 -0.04519658 0.07014292 + 0.1488037 0.09634326 -0.07286209 + 0.09621079 0.02982104 0.08463918 + + -0.10158418 -0.16498475 -0.06439342 + -0.12783715 0.1367565 0.04800522 + 0.06903559 0.05271019 -0.00289933 + + 0.1304347 -0.11841893 0.01049395 + -0.1137567 0.051392 -0.05739705 + 0.05106938 -0.03472358 0.13823198 + + -0.09878366 -0.09939967 -0.09940567 + 0.14990713 0.05554186 0.16037513 + -0.13754605 -0.16531269 -0.13039397 + + + -0.11211485 0.06750669 0.05967931 + 0.13848741 -0.0860709 -0.11361863 + 0.0884297 -0.06736742 0.10115398 + + -0.03955011 0.09534098 -0.12949467 + -0.08410829 0.05081274 0.03523459 + -0.04249313 0.09934492 0.11330153 + + -0.12086223 -0.08897804 0.15261032 + -0.05623876 -0.05908607 -0.16126578 + -0.09544504 0.04163395 -0.02199887 + + -0.120981 0.00390945 -0.11384692 + -0.14139944 -0.09177711 -0.14586869 + -0.10612302 0.1666015 0.03147916 + + + 0.05135995 -0.1554474 -0.10946231 + -0.05547597 0.02606185 -0.14665356 + -0.07181218 -0.0997781 0.00046188 + + -0.0620172 -0.01154929 -0.11293828 + -0.11439919 -0.09723364 -0.05704957 + -0.13154683 0.13974498 -0.03307734 + + 0.14339946 0.05193035 -0.14113283 + 0.1153392 -0.04585747 -0.06388768 + -0.13834509 -0.16569345 0.0476851 + + -0.03640731 0.06488718 -0.13677725 + 0.12373818 -0.12234229 -0.0287789 + 0.03481162 0.08604197 0.13455172 + + + 0.15182655 -0.13215369 0.04194455 + -0.07168766 -0.01826413 -0.12474835 + 0.15180977 -0.12232509 0.08907522 + + 0.05857326 0.05415933 -0.0901077 + 0.15149193 0.03662507 0.02144001 + -0.1468758 0.06996475 -0.02500343 + + -0.07635405 0.14315777 0.03715813 + -0.09221274 -0.08435649 -0.00795929 + 0.09306021 -0.04258897 -0.09509581 + + -0.05707382 -0.12451248 0.05943875 + 0.1290067 -0.15690672 0.03870845 + 0.08609863 0.03022157 -0.05935411", + rows=C_base, cols=C_base*3*3) + gamma_bn2 = matrix(1, rows=C_base, cols=1) + beta_bn2 = matrix(0, rows=C_base, cols=1) + W_conv3 = matrix(" 0.26097107 + + 0.2627566 + + 0.18696362 + + -0.0878607 + + + -0.13240063 + + 0.05349046 + + -0.08832705 + + -0.14900053 + + + 0.31960344 + + 0.42969978 + + -0.04949868 + + -0.11194843 + + + 0.00729614 + + -0.02985412 + + 0.12020564 + + 0.14011681 + + + -0.45412838 + + -0.18451887 + + 0.42106473 + + 0.19477749 + + + -0.02486879 + + -0.30145288 + + -0.30590254 + + -0.44788343 + + + -0.16298121 + + 0.16885209 + + 0.31881082 + + 0.23084867 + + + -0.44197202 + + -0.30068123 + + -0.07890832 + + 0.48367476 + + + 0.07232875 + + -0.12948537 + + 0.20685762 + + -0.19044077 + + + -0.32362783 + + 0.36494362 + + -0.22735089 + + -0.10023338 + + + -0.49740213 + + 0.33463532 + + 0.37881732 + + 0.1822241 + + + -0.3486371 + + -0.49346995 + + -0.40608948 + + 0.37285012 + + + 0.24005288 + + 0.42075223 + + 0.26193494 + + 0.12654608 + + + -0.00489634 + + -0.38025302 + + -0.4283861 + + -0.46767431 + + + 0.20468098 + + -0.245484 + + -0.10062629 + + -0.28775263 + + + -0.09111178 + + -0.35191745 + + -0.32670784 + + 0.16585541", + rows=4*C_base, cols=C_base*1*1) + gamma_bn3 = matrix(1, rows=4*C_base, cols=1) + beta_bn3 = matrix(0, rows=4*C_base, cols=1) + # downsample weights + W_conv4 = matrix(" 0.5406104 + + 0.5869042 + + + -0.16565567 + + 0.6495562 + + + -0.15492964 + + 0.14268756 + + + -0.3442585 + + 0.41527158 + + + 0.62334496 + + -0.5187534 + + + 0.61461455 + + 0.13234162 + + + 0.5224168 + + 0.09576386 + + + 0.34095842 + + -0.09983712 + + + 0.5450986 + + 0.10451669 + + + -0.33010566 + + 0.18024033 + + + -0.32579 + + -0.08292443 + + + -0.2871973 + + 0.46907407 + + + -0.558169 + + -0.32598352 + + + -0.19966906 + + -0.42516384 + + + 0.06673896 + + -0.6983946 + + + 0.63859457 + + -0.60066473", + rows=4*C_base, cols=C_in*1*1) + gamma_bn4 = matrix(1, rows=4*C_base, cols=1) + beta_bn4 = matrix(0, rows=4*C_base, cols=1) + + weights = list(W_conv1, gamma_bn1, beta_bn1, W_conv2, gamma_bn2, beta_bn2, + W_conv3, gamma_bn3, beta_bn3, W_conv4, gamma_bn4, beta_bn4) + mode = "train" + + ema_mean_bn1 = matrix(0, rows=C_base, cols=1) + ema_var_bn1 = matrix(0, rows=C_base, cols=1) + ema_mean_bn2 = matrix(0, rows=C_base, cols=1) + ema_var_bn2 = matrix(0, rows=C_base, cols=1) + ema_mean_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_mean_bn4 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn4 = matrix(0, rows=4*C_base, cols=1) + + ema_means_vars = list(ema_mean_bn1, ema_var_bn1, ema_mean_bn2, ema_var_bn2, + ema_mean_bn3, ema_var_bn3, ema_mean_bn4, ema_var_bn4) + + [out, Hout, Wout, ema_means_vars_up] = resnet::bottleneck_block_forward(X, weights, C_in, C_base, Hin, Win, + strideh, stridew, mode, ema_means_vars) + + Hout_exp = 2 + Wout_exp = 2 + out_expected = matrix(" 0. 0. + 0. 0. + + 0. 0. + 0. 0. + + 0.9109738 0. + 0.26423687 0.02831399 + + 0. 1.1842504 + 0. 0. + + 0. 0.27886808 + 0. 0. + + 0. 0. + 0. 0. + + 0. 0.4690894 + 0. 0. + + 0. 0.5754694 + 0. 0. + + 0. 0. + 0.15799654 0.06399322 + + 1.6746848 0.10162199 + 1.1494169 0.7138793 + + 1.7823488 2.4663901 + 1.1404462 1.3706592 + + 0. 0.5186337 + 0. 0. + + 0.46825778 1.802414 + 0. 0. + + 1.8138912 0. + 2.061407 1.230821 + + 1.5826645 0. + 1.597543 1.0766873 + + 0. 0.05367124 + 0. 0. + + + 0. 0. + 0.14620686 0. + + 0. 0. + 0.5086571 0.7567121 + + 0. 0. + 0. 0. + + 0.22735572 0.93278456 + 0.3514644 0.19731534 + + 0.36241722 0.7131092 + 0.4989733 0.43317872 + + 0. 0. + 0. 0. + + 0.46766508 0.79504216 + 0.88795483 0.6664516 + + 0.37459964 0.99809396 + 0.4362625 0.5768033 + + 0. 0. + 0. 0. + + 0.67712426 0.07316476 + 0.9103337 0.88836694 + + 0.8823242 0.8489517 + 0.75503683 0.45444334 + + 0.23441726 1.0932865 + 0.0098927 0.47565216 + + 0.2859242 0.03880775 + 0.80046034 0.08464086 + + 0. 0. + 0. 0. + + 0. 0. + 0. 0. + + 0. 0.8421172 + 0. 0.02687168 + + + 2.295794 2.8480763 + 2.8238502 2.022955 + + 1.1745573 0.23529911 + 2.7716846 0.32033563 + + 0.23475814 0.3514259 + 0.3389814 0. + + 0. 0.14817369 + 0. 0.9291567 + + 0. 0. + 0.31184435 0.17355764 + + 1.9307995 2.276723 + 1.5024273 2.3755558 + + 0. 0. + 0.94689417 0. + + 0. 0. + 0.16562259 0.579775 + + 1.3288593 2.229822 + 0.2227614 2.0304394 + + 0. 0. + 0.5008893 0. + + 0. 0. + 0. 0. + + 0. 0. + 0. 1.2533097 + + 0. 0. + 0.40518343 0. + + 0. 0.19982207 + 0. 0. + + 0.03520691 0.69359136 + 0. 0.04526865 + + 0.48097372 1.0040715 + 0. 2.744193 ", + rows=N, cols=Hout_exp*Wout_exp*4*C_base) + + test_util::check_all_close(out, out_expected, 0.00001) +} + values_test_bottleneck_forward_1() + +values_test_bottleneck_forward_2() From 5705573f19a7ab0527701ee85699668cb52ef8bc Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 16:48:55 +0100 Subject: [PATCH 08/15] Added documentation to component test --- .../nn/component/resnet_bottleneck.dml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index 9496dcf4616..0eb884d113c 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -742,6 +742,34 @@ values_test_bottleneck_forward_2 = function() { test_util::check_all_close(out, out_expected, 0.00001) } + +/* + * **** Bottleneck Block Value Testing **** + * In these test cases, we compare the forward pass + * computation of a basic residual block against the + * PyTorch implementation. We calculate the PyTorch + * values with the NN module + * torchvision.models.resnet.Bottleneck and then + * hard-code the randomly initialized weights and + * biases and the expected output computed by PyTorch + * into this file. + */ + +/* + * Test case 1: + * A forward pass of the bottleneck block with same + * input and output dimensions, i.e. stride 1 but + * with the default block expansion, i.e. different + * number of input and output channels thus requiring + * downsampling, in train mode. + */ values_test_bottleneck_forward_1() +/* + * Test case 2: + * A forward pass of the bottleneck block with different + * input and output dimensions with stride of 2 and + * with the default block expansion, i.e. downsampling, + * in train mode. + */ values_test_bottleneck_forward_2() From 011df23033035405ce0d0d0d1350c7a7d181619d Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 17:45:20 +0100 Subject: [PATCH 09/15] Forward pass for whole residual layer of bottleneck blocks + 1 component test --- scripts/nn/networks/resnet.dml | 58 ++ .../nn/component/resnet_bottleneck.dml | 840 ++++++++++++++++++ 2 files changed, 898 insertions(+) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index def05893433..7786482df37 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -373,6 +373,64 @@ basic_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks } } +bottleneck_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks, + int strideh, int stridew, int C_in, int C_base, + list[unknown] blocks_weights, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, int Hout, int Wout, + list[unknown] ema_means_vars_upd) { + /* + * Executes the forward pass for a sequence of bottleneck + * residual blocks with the same number of base channels, + * i.e. residual layer. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win) + * - Hin: Input height. + * - Win: Input width. + * - blocks: Number of residual blocks (bigger than 0). + * - strideh: Stride height for first conv layer of first block. + * - stridew: Stride width for first conv layer of first block. + * - C_in: Number of input channels. + * - C_base: Number of base channels of res layer. + * - blocks_weights: List of weights of each block. + * -> i: List of weights of block i with the content + * defined in the docs of basic_block_forward(). + * -> length == blocks + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers of each block. + * -> i: List of EMAs of block i with the content defined + * in the docs of basic_block_forward(). + * -> length == blocks + */ + # default values + mu_bn = 0.1 + epsilon_bn = 1e-05 + + # first block with provided stride + [out, Hout, Wout, emas1_upd] = bottleneck_block_forward(X, as.list(blocks_weights[1]), + C_in, C_base, Hin, Win, strideh, stridew, + mode, as.list(ema_means_vars[1])) + ema_means_vars_upd = list(emas1_upd) + + # other blocks + + # account for block expansion + C_in = 4*C_base + for (i in 2:blocks) { + current_weights = as.list(blocks_weights[i]) + current_emas = as.list(ema_means_vars[i]) + [out, Hout, Wout, current_emas_upd] = bottleneck_block_forward(X=out, + weights=current_weights, C_in=C_in, C_base=C_base, + Hin=Hout, Win=Wout, strideh=1, stridew=1, mode=mode, + ema_means_vars=current_emas) + ema_means_vars_upd = append(ema_means_vars_upd, current_emas_upd) + } +} + resnet_basic_forward = function(matrix[double] X, int Hin, int Win, list[unknown] layer_sizes, list[unknown] model, string mode, diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index 0eb884d113c..e69502ddf3c 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -742,6 +742,827 @@ values_test_bottleneck_forward_2 = function() { test_util::check_all_close(out, out_expected, 0.00001) } +values_test_residual_layer_forward = function() { + Hin = 3; Win = 3; + C_in = 2; C_base = 4; + N = 2 + blocks = 2 + strideh = 2; stridew = 2 + mode = "train" + + X = matrix(seq(1, N*Hin*Win*C_in), rows=N, cols=C_in*Hin*Win) + + # weights for block 1 + W_conv1 = matrix(" 0.64133304 + + -0.30935785 + + + -0.26633477 + + 0.70287377 + + + -0.01898092 + + 0.4187042 + + + 0.26043642 + + 0.65828437", + rows=C_base, cols=C_in*1*1) + gamma_bn1 = matrix(1, rows=C_base, cols=1) + beta_bn1 = matrix(0, rows=C_base, cols=1) + W_conv2 = matrix(" -0.07935387 -0.13985893 0.11053868 + -0.1480007 0.1574573 0.15253572 + 0.15747602 0.16496314 0.034954 + + -0.0016536 -0.0961354 0.02648038 + 0.11480387 -0.09928016 -0.13879894 + 0.06693192 0.13438095 -0.07921901 + + -0.03791906 0.11089791 0.07005687 + 0.16293873 -0.0442756 -0.02163965 + 0.0766806 0.03089702 0.05085187 + + 0.0042686 -0.12960267 0.08154561 + 0.01905231 -0.07790075 0.03734766 + 0.00651456 0.11704092 0.10655515 + + + -0.07699027 -0.03226709 -0.07629572 + 0.15273114 0.10044114 -0.07274394 + 0.1516081 0.09493737 0.07546788 + + -0.0726403 -0.15355454 0.11175342 + -0.04025601 -0.03588425 0.12585725 + 0.13138913 0.10351892 0.09438397 + + 0.03881417 -0.12341571 0.02957357 + -0.00408512 0.00771086 -0.14772138 + -0.02420218 0.02257259 -0.05039396 + + -0.15493202 0.03471132 -0.07309394 + 0.14865221 -0.09820288 -0.01550098 + 0.10916193 -0.03161931 -0.08472542 + + + 0.08376767 -0.12832934 -0.00301033 + 0.10904421 0.02656408 -0.0239322 + 0.04957093 -0.07315841 0.05891012 + + -0.06879979 -0.12485262 0.03166164 + 0.07797651 -0.11737081 -0.01409818 + 0.00895227 -0.07557337 0.01686102 + + -0.06918794 0.12680571 0.05430049 + -0.04680433 -0.08028235 -0.0635463 + -0.01600344 0.07835184 -0.05069631 + + -0.07545875 0.07460625 0.03207885 + 0.16395922 -0.096992 -0.07280873 + 0.0738041 0.08279254 0.02707005 + + + 0.05216871 0.09574054 -0.10493654 + -0.13408403 -0.09751024 -0.09929453 + -0.13031684 0.06681591 0.13878734 + + -0.06675949 0.14814068 0.06931506 + -0.1507676 -0.15641318 0.02355962 + -0.09087996 0.10453825 0.06647408 + + 0.14790873 0.03328724 0.02318728 + -0.04853491 0.1355549 -0.09577837 + -0.01273669 0.14791735 0.04074055 + + 0.14454861 -0.00055562 -0.02485079 + -0.0598551 0.1521839 0.13950332 + -0.03670852 0.03655088 0.11852266", + rows=C_base, cols=C_base*3*3) + gamma_bn2 = matrix(1, rows=C_base, cols=1) + beta_bn2 = matrix(0, rows=C_base, cols=1) + W_conv3 = matrix(" -0.29437304 + + 0.18341273 + + -0.4085917 + + 0.26053882 + + + 0.31638247 + + 0.4066484 + + 0.4634807 + + -0.07265258 + + + 0.24328196 + + -0.29365987 + + 0.47914386 + + -0.48774058 + + + -0.3374917 + + 0.22080815 + + -0.038697 + + -0.28921968 + + + 0.24076295 + + 0.05936813 + + -0.37986314 + + 0.4292264 + + + 0.38059032 + + -0.2816422 + + 0.36293274 + + -0.18708462 + + + -0.3077286 + + -0.3397568 + + 0.36514604 + + -0.49366736 + + + 0.10718703 + + -0.39724267 + + -0.47798198 + + 0.47490656 + + + -0.02514178 + + -0.09093165 + + 0.25952834 + + 0.11998898 + + + 0.36005867 + + -0.21023494 + + -0.3787375 + + -0.0058462 + + + 0.06437212 + + -0.24833494 + + -0.10442317 + + -0.09545577 + + + -0.3191191 + + 0.13568145 + + -0.18130076 + + -0.13757545 + + + 0.13844067 + + -0.32146406 + + -0.49979246 + + -0.13366985 + + + -0.11780888 + + -0.12696624 + + 0.2837879 + + -0.0088647 + + + 0.15086323 + + 0.02197719 + + -0.10307682 + + 0.01073551 + + + 0.21700346 + + 0.27309185 + + -0.11225039 + + 0.21934658", + rows=4*C_base, cols=C_base*1*1) + gamma_bn3 = matrix(1, rows=4*C_base, cols=1) + beta_bn3 = matrix(0, rows=4*C_base, cols=1) + # downsample weights + W_conv4 = matrix(" -0.46313006 + + -0.03330129 + + + -0.2322562 + + 0.6264885 + + + -0.67066795 + + -0.0445317 + + + 0.68385714 + + -0.15812147 + + + 0.67979616 + + 0.1485151 + + + -0.55756646 + + -0.11412668 + + + -0.2764914 + + 0.5107109 + + + -0.02068317 + + 0.09800112 + + + 0.376929 + + 0.5911364 + + + 0.1532327 + + -0.66188455 + + + -0.50742537 + + 0.4692853 + + + -0.40435696 + + -0.1191256 + + + 0.28702432 + + 0.59869534 + + + -0.31104982 + + -0.02253169 + + + 0.2543087 + + 0.27674532 + + + -0.23902792 + + 0.4872386 ", + rows=4*C_base, cols=C_in*1*1) + gamma_bn4 = matrix(1, rows=4*C_base, cols=1) + beta_bn4 = matrix(0, rows=4*C_base, cols=1) + + weights_block1 = list(W_conv1, gamma_bn1, beta_bn1, W_conv2, gamma_bn2, beta_bn2, + W_conv3, gamma_bn3, beta_bn3, W_conv4, gamma_bn4, beta_bn4) + mode = "train" + + ema_mean_bn1 = matrix(0, rows=C_base, cols=1) + ema_var_bn1 = matrix(0, rows=C_base, cols=1) + ema_mean_bn2 = matrix(0, rows=C_base, cols=1) + ema_var_bn2 = matrix(0, rows=C_base, cols=1) + ema_mean_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_mean_bn4 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn4 = matrix(0, rows=4*C_base, cols=1) + + emas_block1 = list(ema_mean_bn1, ema_var_bn1, ema_mean_bn2, ema_var_bn2, + ema_mean_bn3, ema_var_bn3, ema_mean_bn4, ema_var_bn4) + + # Weights for block 2 + C_in = 4*C_base + W_conv1_2 = matrix(" -0.15873042 + + -0.12685311 + + 0.24818534 + + -0.11151049 + + -0.21107942 + + 0.1189599 + + 0.17549959 + + 0.22624752 + + -0.2047379 + + -0.11143267 + + 0.10603568 + + 0.16207412 + + 0.16275376 + + -0.1806739 + + -0.02120981 + + -0.0203568 + + + 0.04928717 + + 0.11670396 + + 0.23074868 + + 0.21299681 + + 0.00101537 + + 0.01027396 + + 0.02288854 + + -0.16491142 + + -0.1332432 + + -0.1547856 + + -0.06269354 + + -0.11677879 + + 0.0370577 + + 0.20312098 + + 0.04698643 + + -0.09891671 + + + -0.08488309 + + -0.11952144 + + 0.06800526 + + 0.0516184 + + -0.21355331 + + 0.13793284 + + 0.13408375 + + 0.06124464 + + 0.18330386 + + -0.15059847 + + 0.24269676 + + -0.22168538 + + 0.10450107 + + 0.02138337 + + -0.04721311 + + 0.14241049 + + + 0.08129939 + + -0.04329139 + + -0.09548986 + + -0.08126539 + + -0.0406346 + + 0.0537217 + + 0.00775704 + + -0.24578935 + + 0.12068957 + + -0.20760396 + + -0.22163606 + + -0.21903417 + + -0.17546788 + + -0.11092317 + + -0.2458466 + + -0.11614364", + rows=C_base, cols=C_in*1*1) + gamma_bn1_2 = matrix(1, rows=C_base, cols=1) + beta_bn1_2 = matrix(0, rows=C_base, cols=1) + W_conv2_2 = matrix(" -6.49780855e-02 -1.53550386e-01 1.27520934e-01 + -1.14963055e-01 3.47854644e-02 -9.77773294e-02 + 1.39712289e-01 3.93924862e-02 -1.08044863e-01 + + -8.77258033e-02 9.06148702e-02 -1.23133540e-01 + -1.61552995e-01 -7.69458413e-02 6.84276223e-04 + 1.16330996e-01 -5.43563366e-02 5.16963005e-02 + + -1.61242843e-01 4.86649424e-02 9.66079682e-02 + 5.86722940e-02 2.77588516e-02 1.50955319e-02 + -1.50006920e-01 3.65351588e-02 -4.17252779e-02 + + 6.55835271e-02 -8.94589424e-02 -8.49471986e-03 + -5.53610176e-02 9.70451981e-02 7.06677288e-02 + 1.65115044e-01 3.72097939e-02 -5.02234101e-02 + + + -8.07390213e-02 -4.82785702e-02 6.31745458e-02 + -7.80546665e-02 -3.55781764e-02 1.22384578e-02 + -5.75916991e-02 1.36988893e-01 9.28030759e-02 + + -1.29722685e-01 -8.44041556e-02 1.04285926e-02 + 1.21865556e-01 -2.04753429e-02 -7.18220100e-02 + -3.05865854e-02 -1.00977302e-01 -1.63163647e-01 + + -2.82911062e-02 1.47659853e-01 3.42025906e-02 + 8.68119150e-02 1.07737646e-01 -2.95802504e-02 + -7.16226920e-02 -1.23587213e-01 1.40657499e-01 + + 6.51463717e-02 -1.29032552e-01 -1.39336169e-01 + -5.93275055e-02 8.56822282e-02 -1.42511040e-01 + -7.40398169e-02 -5.41220903e-02 -4.33833897e-03 + + + -1.18034229e-01 -7.63256997e-02 7.52437115e-03 + 1.24137387e-01 -1.49110124e-01 -7.26501942e-02 + 9.58135277e-02 1.59387156e-01 2.99520344e-02 + + 1.51494995e-01 -1.38253689e-01 -1.04445815e-01 + -1.05174027e-01 1.84758753e-02 9.58175957e-03 + 4.42418605e-02 -3.34148854e-02 6.24543428e-03 + + -1.62895441e-01 2.09720731e-02 -5.38072586e-02 + 1.08301952e-01 -1.19824275e-01 3.94731760e-05 + -1.18747398e-01 -1.63983807e-01 7.29161650e-02 + + 9.75044221e-02 -1.01447850e-02 -1.66090474e-01 + 1.39794812e-01 1.51129767e-01 -1.42522335e-01 + 2.78563499e-02 -1.24729119e-01 -5.18599004e-02 + + + -1.52138442e-01 -5.16706333e-02 6.49023205e-02 + -9.79826078e-02 1.14026591e-01 -5.25996909e-02 + -8.55211243e-02 1.02774397e-01 1.27465829e-01 + + 5.07049710e-02 5.54278344e-02 5.20297140e-02 + -3.19346040e-02 1.56156614e-01 4.14624810e-04 + -1.10439107e-01 -3.79917622e-02 7.33491927e-02 + + 9.80907828e-02 1.14526585e-01 6.30494207e-02 + -6.73903525e-03 -2.50494480e-03 1.09999135e-01 + 1.50500730e-01 1.41846985e-02 6.23831153e-02 + + -2.22751051e-02 -9.66079682e-02 -6.04704842e-02 + 4.52399105e-02 3.01361084e-02 8.97199064e-02 + 4.74042743e-02 -9.87502337e-02 -1.09392643e-01", + rows=C_base, cols=C_base*3*3) + gamma_bn2_2 = matrix(1, rows=C_base, cols=1) + beta_bn2_2 = matrix(0, rows=C_base, cols=1) + W_conv3_2 = matrix(" 0.06731683 + + -0.24978322 + + -0.00666493 + + 0.08701718 + + + 0.01926881 + + 0.14511615 + + -0.01574725 + + 0.39710528 + + + -0.35157645 + + 0.48869908 + + 0.3519063 + + -0.21306509 + + + 0.40004694 + + -0.00693274 + + -0.43690896 + + 0.46575224 + + + -0.3386495 + + 0.28560823 + + -0.11992186 + + -0.3841107 + + + 0.02532202 + + -0.03704441 + + 0.27476293 + + 0.2742144 + + + -0.28822362 + + 0.2183528 + + 0.0658052 + + -0.20464629 + + + 0.2228418 + + 0.35666758 + + -0.01196289 + + -0.27206528 + + + 0.49370784 + + -0.37761664 + + -0.4462306 + + 0.47271717 + + + -0.38218856 + + 0.06360751 + + -0.33563244 + + -0.04947144 + + + -0.01227283 + + 0.16202927 + + 0.27922666 + + 0.08569479 + + + 0.33986813 + + -0.14089179 + + -0.38342243 + + 0.43166125 + + + -0.15443969 + + 0.21090645 + + 0.4235263 + + -0.3934964 + + + -0.00940305 + + -0.2315566 + + -0.4138875 + + -0.11506236 + + + 0.23220491 + + -0.4435383 + + 0.44961244 + + -0.03749442 + + + 0.14980793 + + -0.3917358 + + 0.45487362 + + 0.35936302", + rows=4*C_base, cols=C_base*1*1) + gamma_bn3_2 = matrix(1, rows=4*C_base, cols=1) + beta_bn3_2 = matrix(0, rows=4*C_base, cols=1) + + weights_block2 = list(W_conv1_2, gamma_bn1_2, beta_bn1_2, W_conv2_2, gamma_bn2_2, beta_bn2_2, + W_conv3_2, gamma_bn3_2, beta_bn3_2) + mode = "train" + + ema_mean_bn1 = matrix(0, rows=C_base, cols=1) + ema_var_bn1 = matrix(0, rows=C_base, cols=1) + ema_mean_bn2 = matrix(0, rows=C_base, cols=1) + ema_var_bn2 = matrix(0, rows=C_base, cols=1) + ema_mean_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn3 = matrix(0, rows=4*C_base, cols=1) + ema_mean_bn4 = matrix(0, rows=4*C_base, cols=1) + ema_var_bn4 = matrix(0, rows=4*C_base, cols=1) + + emas_block2 = list(ema_mean_bn1, ema_var_bn1, ema_mean_bn2, ema_var_bn2, + ema_mean_bn3, ema_var_bn3, ema_mean_bn4, ema_var_bn4) + + expected_Hout = 2 + expected_Wout = 2 + expected_out = matrix(" 1.3795955 1.667117 + 2.1051257 0.4751625 + + 0. 0. + 2.196381 0. + + 1.6621143 0.62258995 + 0. 1.9935839 + + 0. 0.563171 + 2.068542 0. + + 0.4887283 0. + 0. 0. + + 0.6661688 0.70850194 + 2.4760737 2.0852184 + + 0.24420133 0. + 0. 0.7932879 + + 0. 0.61385596 + 0. 0. + + 0. 0.82111585 + 2.0687509 0. + + 1.6963062 0. + 0. 0. + + 0.8057394 0.61964965 + 0.89105517 2.5202808 + + 1.6423799 2.2449687 + 3.336556 0. + + 0. 0. + 0. 1.5585338 + + 3.2802167 2.9148674 + 1.8678657 0. + + 0. 0.78625363 + 0.49233294 1.973973 + + 0. 0.11497754 + 1.5337045 1.4560605 + + + 0. 0.5596464 + 0. 0. + + 1.0924859 3.3933737 + 0.20856619 0.7022976 + + 1.4003831 0. + 0.45457155 0.6552094 + + 0. 0.6216905 + 1.102113 2.0085845 + + 3.6501603 0.8336022 + 2.219977 2.0842493 + + 0.76798964 1.2647653 + 0. 0. + + 1.0118731 0. + 1.9565775 2.42934 + + 3.240756 0. + 2.0738726 3.3796282 + + 1.3811504 0.30889064 + 0. 0. + + 1.818252 0.9127424 + 1.1968298 0.4844057 + + 1.6940281 0. + 0. 0.10140109 + + 0. 0.2724784 + 0. 0. + + 1.5154634 0. + 2.1841986 2.747264 + + 0. 0.80568683 + 0.36282468 0. + + 1.35923 2.4812453 + 0. 0. + + 1.9370286 2.6119037 + 0. 0. ", + rows=N, cols=4*C_base*expected_Hout*expected_Wout) + + blocks_weights = list(weights_block1, weights_block2) + ema_means_vars = list(emas_block1, emas_block2) + C_in = 2 + [out, Hout, Wout, ema_means_vars_upd] = resnet::bottleneck_reslayer_forward(X, Hin, Win, blocks, + strideh, stridew, C_in, C_base, + blocks_weights, mode, ema_means_vars) + + test_util::check_all_close(out, expected_out, 0.0001) +} + /* * **** Bottleneck Block Value Testing **** @@ -773,3 +1594,22 @@ values_test_bottleneck_forward_1() * in train mode. */ values_test_bottleneck_forward_2() + +/* + * *** Residual Layer Value Testing *** + * A residual layer is a sequence of residual blocks + * which all have the same number of base channels. In + * residual networks, there are 4 different residual layer. + * With this test, we test the correct computation of the + * shape and values of the output by comparing it to PyTorches + * residual layers. We modified the PyTorch implementation to + * extract the residual layer. + */ + +/* + * Test case 1: + * A residual layer forward pass with 2 bottleneck residual + * blocks with a stride of 2 and the default block expansion + * of 4. + */ +values_test_residual_layer_forward() From c0d74000eb307d5b5cc31091171e0378f9cddb8a Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 18:08:56 +0100 Subject: [PATCH 10/15] Generalized reslayer forward pass for bottleneck & basic block --- scripts/nn/networks/resnet.dml | 93 +++++++++++++++++-- .../nn/component/resnet_basic.dml | 7 +- .../nn/component/resnet_bottleneck.dml | 7 +- 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index 7786482df37..b47c545e748 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -431,6 +431,77 @@ bottleneck_reslayer_forward = function(matrix[double] X, int Hin, int Win, int b } } +reslayer_forward = function(matrix[double] X, int Hin, int Win, + string block_type, int blocks, int strideh, + int stridew, int C_in, int C_base, + list[unknown] blocks_weights, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, int Hout, int Wout, + list[unknown] ema_means_vars_upd) { + /* + * Executes the forward pass for a sequence of bottleneck + * residual blocks with the same number of base channels, + * i.e. residual layer. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win) + * - Hin: Input height. + * - Win: Input width. + * - blocks: Number of residual blocks (bigger than 0). + * - strideh: Stride height for first conv layer of first block. + * - stridew: Stride width for first conv layer of first block. + * - C_in: Number of input channels. + * - C_base: Number of base channels of res layer. + * - blocks_weights: List of weights of each block. + * -> i: List of weights of block i with the content + * defined in the docs of basic_block_forward(). + * -> length == blocks + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers of each block. + * -> i: List of EMAs of block i with the content defined + * in the docs of basic_block_forward(). + * -> length == blocks + */ + # default values + mu_bn = 0.1 + epsilon_bn = 1e-05 + + # first block with provided stride + if (block_type == "basic") { + [out, Hout, Wout, emas1_upd] = basic_block_forward(X, + as.list(blocks_weights[1]), + C_in, C_base, Hin, Win, strideh, stridew, + mode, as.list(ema_means_vars[1])) + } else { + [out, Hout, Wout, emas1_upd] = bottleneck_block_forward(X, + as.list(blocks_weights[1]), + C_in, C_base, Hin, Win, strideh, stridew, + mode, as.list(ema_means_vars[1])) + } + ema_means_vars_upd = list(emas1_upd) + + # other blocks + for (i in 2:blocks) { + current_weights = as.list(blocks_weights[i]) + current_emas = as.list(ema_means_vars[i]) + if (block_type == "basic") { + [out, Hout, Wout, current_emas_upd] = basic_block_forward(X=out, + weights=current_weights, C_in=C_base, C_base=C_base, + Hin=Hout, Win=Wout, strideh=1, stridew=1, mode=mode, + ema_means_vars=current_emas) + } else { + [out, Hout, Wout, current_emas_upd] = bottleneck_block_forward(X=out, + weights=current_weights, C_in=C_base*4, + C_base=C_base, Hin=Hout, Win=Wout, strideh=1, + stridew=1, mode=mode, ema_means_vars=current_emas) + } + ema_means_vars_upd = append(ema_means_vars_upd, current_emas_upd) + } +} + resnet_basic_forward = function(matrix[double] X, int Hin, int Win, list[unknown] layer_sizes, list[unknown] model, string mode, @@ -505,6 +576,8 @@ resnet_basic_forward = function(matrix[double] X, int Hin, int Win, mu_bn = 0.1 epsilon_bn = 1e-05 + block_type = "basic" + # extract model params W_conv1 = as.matrix(model[1]) gamma_bn1 = as.matrix(model[2]); beta_bn1 = as.matrix(model[3]) @@ -540,32 +613,36 @@ resnet_basic_forward = function(matrix[double] X, int Hin, int Win, # residual layer 1 block_count = as.integer(as.scalar(layer_sizes[1])) - [out, Hout, Wout, emas1_upd] = basic_reslayer_forward(X=out, Hin=Hout, - Win=Wout, blocks=block_count, strideh=1, + [out, Hout, Wout, emas1_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=1, stridew=1, C_in=C, C_base=64, blocks_weights=weights_reslayer1, mode=mode, ema_means_vars=emas_reslayer1) C = 64 # residual layer 2 block_count = as.integer(as.scalar(layer_sizes[2])) - [out, Hout, Wout, emas2_upd] = basic_reslayer_forward(X=out, Hin=Hout, - Win=Wout, blocks=block_count, strideh=2, + [out, Hout, Wout, emas2_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, stridew=2, C_in=C, C_base=128, blocks_weights=weights_reslayer2, mode=mode, ema_means_vars=emas_reslayer2) C = 128 # residual layer 3 block_count = as.integer(as.scalar(layer_sizes[3])) - [out, Hout, Wout, emas3_upd] = basic_reslayer_forward(X=out, Hin=Hout, - Win=Wout, blocks=block_count, strideh=2, + [out, Hout, Wout, emas3_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, stridew=2, C_in=C, C_base=256, blocks_weights=weights_reslayer3, mode=mode, ema_means_vars=emas_reslayer3) C = 256 # residual layer 4 block_count = as.integer(as.scalar(layer_sizes[4])) - [out, Hout, Wout, emas4_upd] = basic_reslayer_forward(X=out, Hin=Hout, - Win=Wout, blocks=block_count, strideh=2, + [out, Hout, Wout, emas4_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, stridew=2, C_in=C, C_base=512, blocks_weights=weights_reslayer4, mode=mode, ema_means_vars=emas_reslayer4) diff --git a/src/test/scripts/applications/nn/component/resnet_basic.dml b/src/test/scripts/applications/nn/component/resnet_basic.dml index 9ac30db3c80..1f64849c48d 100644 --- a/src/test/scripts/applications/nn/component/resnet_basic.dml +++ b/src/test/scripts/applications/nn/component/resnet_basic.dml @@ -534,9 +534,10 @@ values_test_residual_layer_forward = function() { blocks_weights = list(block1_weights, block2_weights) ema_means_vars = list(block1_EMAs, block2_EMAs) - [out, Hout, Wout, ema_means_vars_upd] = resnet::basic_reslayer_forward(X, Hin, Win, blocks, - strideh, stridew, C_in, C_base, - blocks_weights, mode, ema_means_vars) + block_type = "basic" + [out, Hout, Wout, ema_means_vars_upd] = resnet::reslayer_forward(X, Hin, Win, block_type, + blocks, strideh, stridew, C_in, C_base, + blocks_weights, mode, ema_means_vars) test_util::check_all_close(out, expected_out, 0.0001) } diff --git a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml index e69502ddf3c..e8bc6098699 100644 --- a/src/test/scripts/applications/nn/component/resnet_bottleneck.dml +++ b/src/test/scripts/applications/nn/component/resnet_bottleneck.dml @@ -1556,9 +1556,10 @@ values_test_residual_layer_forward = function() { blocks_weights = list(weights_block1, weights_block2) ema_means_vars = list(emas_block1, emas_block2) C_in = 2 - [out, Hout, Wout, ema_means_vars_upd] = resnet::bottleneck_reslayer_forward(X, Hin, Win, blocks, - strideh, stridew, C_in, C_base, - blocks_weights, mode, ema_means_vars) + block_type = "bottleneck" + [out, Hout, Wout, ema_means_vars_upd] = resnet::reslayer_forward(X, Hin, Win, block_type, + blocks, strideh, stridew, C_in, C_base, + blocks_weights, mode, ema_means_vars) test_util::check_all_close(out, expected_out, 0.0001) } From ae83220dca161aa641a904ec7ca58c39838177c7 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 16 Nov 2023 18:13:03 +0100 Subject: [PATCH 11/15] Removed old reslayer forward passes and upd docs --- scripts/nn/networks/resnet.dml | 128 +++------------------------------ 1 file changed, 11 insertions(+), 117 deletions(-) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index b47c545e748..9b1de648ed2 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -319,118 +319,6 @@ bottleneck_block_forward = function(matrix[double] X, } } -basic_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks, - int strideh, int stridew, int C_in, int C_base, - list[unknown] blocks_weights, string mode, - list[unknown] ema_means_vars) - return (matrix[double] out, int Hout, int Wout, - list[unknown] ema_means_vars_upd) { - /* - * Executes the forward pass for a sequence of residual blocks - * with the same number of base channels, i.e. residual layer. - * - * Inputs: - * - X: Inputs, of shape (N, C_in*Hin*Win) - * - Hin: Input height. - * - Win: Input width. - * - blocks: Number of residual blocks (bigger than 0). - * - strideh: Stride height for first conv layer of first block. - * - stridew: Stride width for first conv layer of first block. - * - C_in: Number of input channels. - * - C_base: Number of base channels of res layer. - * - blocks_weights: List of weights of each block. - * -> i: List of weights of block i with the content - * defined in the docs of basic_block_forward(). - * -> length == blocks - * - mode: 'train' or 'test' to indicate if the model is currently - * being trained or tested for badge normalization layers. - * See badge_norm2d.dml docs for more info. - * - ema_means_vars: List of exponential moving averages for mean - * and variance for badge normalization layers of each block. - * -> i: List of EMAs of block i with the content defined - * in the docs of basic_block_forward(). - * -> length == blocks - */ - # default values - mu_bn = 0.1 - epsilon_bn = 1e-05 - - # first block with provided stride - [out, Hout, Wout, emas1_upd] = basic_block_forward(X, as.list(blocks_weights[1]), - C_in, C_base, Hin, Win, strideh, stridew, - mode, as.list(ema_means_vars[1])) - ema_means_vars_upd = list(emas1_upd) - - # other block - for (i in 2:blocks) { - current_weights = as.list(blocks_weights[i]) - current_emas = as.list(ema_means_vars[i]) - [out, Hout, Wout, current_emas_upd] = basic_block_forward(X=out, - weights=current_weights, C_in=C_base, C_base=C_base, - Hin=Hout, Win=Wout, strideh=1, stridew=1, mode=mode, - ema_means_vars=current_emas) - ema_means_vars_upd = append(ema_means_vars_upd, current_emas_upd) - } -} - -bottleneck_reslayer_forward = function(matrix[double] X, int Hin, int Win, int blocks, - int strideh, int stridew, int C_in, int C_base, - list[unknown] blocks_weights, string mode, - list[unknown] ema_means_vars) - return (matrix[double] out, int Hout, int Wout, - list[unknown] ema_means_vars_upd) { - /* - * Executes the forward pass for a sequence of bottleneck - * residual blocks with the same number of base channels, - * i.e. residual layer. - * - * Inputs: - * - X: Inputs, of shape (N, C_in*Hin*Win) - * - Hin: Input height. - * - Win: Input width. - * - blocks: Number of residual blocks (bigger than 0). - * - strideh: Stride height for first conv layer of first block. - * - stridew: Stride width for first conv layer of first block. - * - C_in: Number of input channels. - * - C_base: Number of base channels of res layer. - * - blocks_weights: List of weights of each block. - * -> i: List of weights of block i with the content - * defined in the docs of basic_block_forward(). - * -> length == blocks - * - mode: 'train' or 'test' to indicate if the model is currently - * being trained or tested for badge normalization layers. - * See badge_norm2d.dml docs for more info. - * - ema_means_vars: List of exponential moving averages for mean - * and variance for badge normalization layers of each block. - * -> i: List of EMAs of block i with the content defined - * in the docs of basic_block_forward(). - * -> length == blocks - */ - # default values - mu_bn = 0.1 - epsilon_bn = 1e-05 - - # first block with provided stride - [out, Hout, Wout, emas1_upd] = bottleneck_block_forward(X, as.list(blocks_weights[1]), - C_in, C_base, Hin, Win, strideh, stridew, - mode, as.list(ema_means_vars[1])) - ema_means_vars_upd = list(emas1_upd) - - # other blocks - - # account for block expansion - C_in = 4*C_base - for (i in 2:blocks) { - current_weights = as.list(blocks_weights[i]) - current_emas = as.list(ema_means_vars[i]) - [out, Hout, Wout, current_emas_upd] = bottleneck_block_forward(X=out, - weights=current_weights, C_in=C_in, C_base=C_base, - Hin=Hout, Win=Wout, strideh=1, stridew=1, mode=mode, - ema_means_vars=current_emas) - ema_means_vars_upd = append(ema_means_vars_upd, current_emas_upd) - } -} - reslayer_forward = function(matrix[double] X, int Hin, int Win, string block_type, int blocks, int strideh, int stridew, int C_in, int C_base, @@ -439,14 +327,17 @@ reslayer_forward = function(matrix[double] X, int Hin, int Win, return (matrix[double] out, int Hout, int Wout, list[unknown] ema_means_vars_upd) { /* - * Executes the forward pass for a sequence of bottleneck - * residual blocks with the same number of base channels, - * i.e. residual layer. + * Executes the forward pass for a sequence of residual + * blocks with the same number of base channels, i.e. + * a residual layer. * * Inputs: * - X: Inputs, of shape (N, C_in*Hin*Win) * - Hin: Input height. * - Win: Input width. + * - block_type: 'basic' or 'bottleneck' depending on + * which type of block should be used the residual + * layer. * - blocks: Number of residual blocks (bigger than 0). * - strideh: Stride height for first conv layer of first block. * - stridew: Stride width for first conv layer of first block. @@ -454,7 +345,9 @@ reslayer_forward = function(matrix[double] X, int Hin, int Win, * - C_base: Number of base channels of res layer. * - blocks_weights: List of weights of each block. * -> i: List of weights of block i with the content - * defined in the docs of basic_block_forward(). + * defined in the docs of basic_block_forward() + * or bottleneck_block_forward() depending on + * the block type. * -> length == blocks * - mode: 'train' or 'test' to indicate if the model is currently * being trained or tested for badge normalization layers. @@ -462,7 +355,8 @@ reslayer_forward = function(matrix[double] X, int Hin, int Win, * - ema_means_vars: List of exponential moving averages for mean * and variance for badge normalization layers of each block. * -> i: List of EMAs of block i with the content defined - * in the docs of basic_block_forward(). + * in the docs of basic_block_forward() or + * bottleneck_block_forward() depending on the block type. * -> length == blocks */ # default values From df20792d73363095111dd070d7e840a11dc13d29 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 17 Nov 2023 13:32:44 +0100 Subject: [PATCH 12/15] Generalized forward pass for bottleneck and basic blocks --- scripts/nn/networks/resnet.dml | 176 ++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 3 deletions(-) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index 9b1de648ed2..f7721abd228 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -336,8 +336,8 @@ reslayer_forward = function(matrix[double] X, int Hin, int Win, * - Hin: Input height. * - Win: Input width. * - block_type: 'basic' or 'bottleneck' depending on - * which type of block should be used the residual - * layer. + * which type of block should be used for the + * residual layer. * - blocks: Number of residual blocks (bigger than 0). * - strideh: Stride height for first conv layer of first block. * - stridew: Stride width for first conv layer of first block. @@ -404,7 +404,8 @@ resnet_basic_forward = function(matrix[double] X, int Hin, int Win, /* * Forward pass of the ResNet 18 and 34 model as introduced * in "Deep Residual Learning for Image Recognition" by - * Kaiming He et. al. and inspired by the PyTorch. + * Kaiming He et. al. and inspired by the PyTorch + * implementation. * * Inputs: * - X: Inputs, of shape (N, C_in*Hin*Win). @@ -550,3 +551,172 @@ resnet_basic_forward = function(matrix[double] X, int Hin, int Win, ema_means_vars_upd = list(ema_mean_bn1_upd, ema_var_bn1_upd, emas1_upd, emas2_upd, emas3_upd, emas4_upd) } + +resnet_forward = function(matrix[double] X, int Hin, int Win, + string block_type, list[unknown] layer_sizes, + list[unknown] model, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, list[unknown] ema_means_vars_upd) { + /* + * Forward pass of the ResNet as introduced in + * "Deep Residual Learning for Image Recognition" by + * Kaiming He et. al. and inspired by the PyTorch + * implementation. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win). + * C_in = 3 is expected. + * - Hin: Input height. + * - Win: Input width. + * - block_type: 'basic' or 'bottleneck' depending on + * which type of block should be used for the + * residual network. + * - layer_sizes: List of the sizes of each of + * the 4 residual layers. + * For ResNet18: [2, 2, 2, 2], RN34: [3, 4, 6, 3], + * RN50: [3, 4, 6, 3], RN101: [3, 4, 23, 3], + * RN152: [3, 8, 36, 3] + * - model: Weights and bias matrices of the model + * with the following order/content: + * -> 1: Weights of conv 1 7x7, of shape (64, 3*7*7) + * -> 2: Weights of batch norm 1, of shape (64, 1). + * -> 3: Bias of batch norm 1, of shape (64, 1). + * -> 4: List of weights for first residual layer + * with 64 base channels. + * -> 5: List of weights for second residual layer + * with 128 base channels. + * -> 6: List of weights for third residual layer + * with 256 base channels. + * -> 7: List of weights for fourth residual layer + * with 512 base channels. + * List of residual layers 1, 2, 3 & 4 have + * the content/order: + * -> i: List of weights for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of weights for a residual block + * must follow the same order as defined in + * the documentation of basic_block_forward() + * or bottleneck_block_forward() depending + * on the block type. + * -> 8: Weights of fully connected layer, of shape (C_out, 1000) + * where C_out = 512 for basic block type and C_out = 2048 + * for bottleneck block type. + * -> 9: Bias of fully connected layer, of shape (1, 1000) + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers. + * -> 1: EMA for mean of badge norm 1, of shape (64, 1). + * -> 2: EMA for variance of badge norm 1, of shape (64, 1). + * -> 3: List of EMA means and vars for residual layer 1. + * -> 4: List of EMA means and vars for residual layer 2. + * -> 5: List of EMA means and vars for residual layer 3. + * -> 6: List of EMA means and vars for residual layer 4. + * Lists for EMAs of layer 1, 2, 3 & 4 must have the + * following order: + * -> i: List of EMA means and vars for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of EMAs for a residual block + * must follow the same order as defined in + * the documentation of basic_block_forward() + * or bottleneck_block_forward(). + * - NOTICE: The lists of the first blocks for layer 2, 3 and 4 + * must include weights and EMAs for 1 extra conv layer + * and a batch norm layer for the downsampling on the + * identity path. + * + * Outputs: + * - out: Outputs, of shape (N, 1000) + * - ema_means_vars_upd: List of updated exponential moving averages + * for mean and variance of badge normalization layers. It follows + * the same exact structure as the input EMAs list. + */ + # default values + mu_bn = 0.1 + epsilon_bn = 1e-05 + + if (block_type == "basic") { + block_expansion = 1 + } else { + block_expansion = 4 + } + + # extract model params + W_conv1 = as.matrix(model[1]) + gamma_bn1 = as.matrix(model[2]); beta_bn1 = as.matrix(model[3]) + weights_reslayer1 = as.list(model[4]) + weights_reslayer2 = as.list(model[5]) + weights_reslayer3 = as.list(model[6]) + weights_reslayer4 = as.list(model[7]) + W_fc = as.matrix(model[8]) + b_fc = as.matrix(model[9]) + ema_mean_bn1 = as.matrix(ema_means_vars[1]); ema_var_bn1 = as.matrix(ema_means_vars[2]) + emas_reslayer1 = as.list(ema_means_vars[3]) + emas_reslayer2 = as.list(ema_means_vars[4]) + emas_reslayer3 = as.list(ema_means_vars[5]) + emas_reslayer4 = as.list(ema_means_vars[6]) + + # Convolutional 7x7 layer + C = 64 + b_conv1 = matrix(0, rows=C, cols=1) + [out, Hout, Wout] = conv2d::forward(X=X, W=W_conv1, b=b_conv1, C=3, + Hin=Hin, Win=Win, Hf=7, Wf=7, strideh=2, + stridew=2, padh=3, padw=3) + # Batch Normalization + [out, ema_mean_bn1_upd, ema_var_bn1_upd, c_mean, c_var] = bn2d::forward(X=out, + gamma=gamma_bn1, beta=beta_bn1, C=C, Hin=Hout, + Win=Wout, mode=mode, ema_mean=ema_mean_bn1, + ema_var=ema_var_bn1, mu=mu_bn, + epsilon=epsilon_bn) + # ReLU + out = relu::forward(X=out) + # Max Pooling 3x3 + [out, Hout, Wout] = mp2d::forward(X=out, C=C, Hin=Hout, Win=Wout, Hf=3, + Wf=3, strideh=2, stridew=2, padh=1, padw=1) + + # residual layer 1 + block_count = as.integer(as.scalar(layer_sizes[1])) + [out, Hout, Wout, emas1_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=1, + stridew=1, C_in=C, C_base=64, + blocks_weights=weights_reslayer1, mode=mode, + ema_means_vars=emas_reslayer1) + C = 64 * block_expansion + # residual layer 2 + block_count = as.integer(as.scalar(layer_sizes[2])) + [out, Hout, Wout, emas2_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, + stridew=2, C_in=C, C_base=128, + blocks_weights=weights_reslayer2, mode=mode, + ema_means_vars=emas_reslayer2) + C = 128 * block_expansion + # residual layer 3 + block_count = as.integer(as.scalar(layer_sizes[3])) + [out, Hout, Wout, emas3_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, + stridew=2, C_in=C, C_base=256, + blocks_weights=weights_reslayer3, mode=mode, + ema_means_vars=emas_reslayer3) + C = 256 * block_expansion + # residual layer 4 + block_count = as.integer(as.scalar(layer_sizes[4])) + [out, Hout, Wout, emas4_upd] = reslayer_forward(X=out, Hin=Hout, + Win=Wout, block_type=block_type, + blocks=block_count, strideh=2, + stridew=2, C_in=C, C_base=512, + blocks_weights=weights_reslayer4, mode=mode, + ema_means_vars=emas_reslayer4) + C = 512 * block_expansion + + # Global Average Pooling + [out, Hout, Wout] = ap2d::forward(X=out, C=C, Hin=Hout, Win=Wout) + # Affine + out = fc::forward(X=out, W=W_fc, b=b_fc) + + ema_means_vars_upd = list(ema_mean_bn1_upd, ema_var_bn1_upd, + emas1_upd, emas2_upd, emas3_upd, emas4_upd) +} From 75e629b865284a12166ed55136c0a398a1aa004f Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Fri, 17 Nov 2023 14:53:25 +0100 Subject: [PATCH 13/15] Resnet50 --- scripts/nn/networks/resnet18.dml | 5 +- scripts/nn/networks/resnet34.dml | 5 +- scripts/nn/networks/resnet50.dml | 101 +++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 scripts/nn/networks/resnet50.dml diff --git a/scripts/nn/networks/resnet18.dml b/scripts/nn/networks/resnet18.dml index f095211e501..8a807921c08 100644 --- a/scripts/nn/networks/resnet18.dml +++ b/scripts/nn/networks/resnet18.dml @@ -89,6 +89,7 @@ forward = function(matrix[double] X, int Hin, int Win, * the same exact structure as the input EMAs list. */ layer_sizes = list(2, 2, 2, 2) - [out, ema_means_vars_upd] = resnet::resnet_basic_forward(X, Hin, Win, - layer_sizes, model, mode, ema_means_vars) + block_type = "basic" + [out, ema_means_vars_upd] = resnet::resnet_forward(X, Hin, Win, block_type, + layer_sizes, model, mode, ema_means_vars) } diff --git a/scripts/nn/networks/resnet34.dml b/scripts/nn/networks/resnet34.dml index a00128362fb..a7e42a31a9b 100644 --- a/scripts/nn/networks/resnet34.dml +++ b/scripts/nn/networks/resnet34.dml @@ -87,6 +87,7 @@ forward = function(matrix[double] X, int Hin, int Win, * the same exact structure as the input EMAs list. */ layer_sizes = list(3, 4, 6, 3) - [out, ema_means_vars_upd] = resnet::resnet_basic_forward(X, Hin, Win, - layer_sizes, model, mode, ema_means_vars) + block_type = "basic" + [out, ema_means_vars_upd] = resnet::resnet_forward(X, Hin, Win, block_type, + layer_sizes, model, mode, ema_means_vars) } diff --git a/scripts/nn/networks/resnet50.dml b/scripts/nn/networks/resnet50.dml new file mode 100644 index 00000000000..b47e4479b9d --- /dev/null +++ b/scripts/nn/networks/resnet50.dml @@ -0,0 +1,101 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +source("scripts/nn/networks/resnet.dml") as resnet + +forward = function(matrix[double] X, int Hin, int Win, + list[unknown] model, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, list[unknown] ema_means_vars_upd) { + /* + * Forward pass of the ResNet50 as introduced in + * "Deep Residual Learning for Image Recognition" by + * Kaiming He et. al. and inspired by the PyTorch + * implementation. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win). + * C_in = 3 is expected. + * - Hin: Input height. + * - Win: Input width. + * - layer_sizes: List of the sizes of each of + * the 4 residual layers. + * For ResNet18: [2, 2, 2, 2], RN34: [3, 4, 6, 3], + * RN50: [3, 4, 6, 3], RN101: [3, 4, 23, 3], + * RN152: [3, 8, 36, 3] + * - model: Weights and bias matrices of the model + * with the following order/content: + * -> 1: Weights of conv 1 7x7, of shape (64, 3*7*7) + * -> 2: Weights of batch norm 1, of shape (64, 1). + * -> 3: Bias of batch norm 1, of shape (64, 1). + * -> 4: List of weights for first residual layer + * with 64 base channels. + * -> 5: List of weights for second residual layer + * with 128 base channels. + * -> 6: List of weights for third residual layer + * with 256 base channels. + * -> 7: List of weights for fourth residual layer + * with 512 base channels. + * List of residual layers 1, 2, 3 & 4 have + * the content/order: + * -> i: List of weights for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of weights for a residual block + * must follow the same order as defined in + * the documentation of bottleneck_block_forward(). + * -> 8: Weights of fully connected layer, of shape (C_out, 1000) + * where C_out = 512 for basic block type and C_out = 2048 + * for bottleneck block type. + * -> 9: Bias of fully connected layer, of shape (1, 1000) + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers. + * -> 1: EMA for mean of badge norm 1, of shape (64, 1). + * -> 2: EMA for variance of badge norm 1, of shape (64, 1). + * -> 3: List of EMA means and vars for residual layer 1. + * -> 4: List of EMA means and vars for residual layer 2. + * -> 5: List of EMA means and vars for residual layer 3. + * -> 6: List of EMA means and vars for residual layer 4. + * Lists for EMAs of layer 1, 2, 3 & 4 must have the + * following order: + * -> i: List of EMA means and vars for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of EMAs for a residual block + * must follow the same order as defined in + * the documentation bottleneck_block_forward(). + * - NOTICE: The lists of the first blocks for layer 2, 3 and 4 + * must include weights and EMAs for 1 extra conv layer + * and a batch norm layer for the downsampling on the + * identity path. + * + * Outputs: + * - out: Outputs, of shape (N, 1000) + * - ema_means_vars_upd: List of updated exponential moving averages + * for mean and variance of badge normalization layers. It follows + * the same exact structure as the input EMAs list. + */ + layer_sizes = list(3, 4, 6, 3) + block_type = "bottleneck" + [out, ema_means_vars_upd] = resnet::resnet_forward(X, Hin, Win, block_type, + layer_sizes, model, mode, ema_means_vars) +} From 64307c89b0751db1d20d521164476b1f9e653327 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 7 Dec 2023 16:43:05 +0100 Subject: [PATCH 14/15] ResNet101 & ResNet152 --- scripts/nn/networks/resnet101.dml | 101 ++++++++++++++++++++++++++++++ scripts/nn/networks/resnet152.dml | 101 ++++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 scripts/nn/networks/resnet101.dml create mode 100644 scripts/nn/networks/resnet152.dml diff --git a/scripts/nn/networks/resnet101.dml b/scripts/nn/networks/resnet101.dml new file mode 100644 index 00000000000..e91905b0c0e --- /dev/null +++ b/scripts/nn/networks/resnet101.dml @@ -0,0 +1,101 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +source("scripts/nn/networks/resnet.dml") as resnet + +forward = function(matrix[double] X, int Hin, int Win, + list[unknown] model, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, list[unknown] ema_means_vars_upd) { + /* + * Forward pass of the ResNet101 as introduced in + * "Deep Residual Learning for Image Recognition" by + * Kaiming He et. al. and inspired by the PyTorch + * implementation. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win). + * C_in = 3 is expected. + * - Hin: Input height. + * - Win: Input width. + * - layer_sizes: List of the sizes of each of + * the 4 residual layers. + * For ResNet18: [2, 2, 2, 2], RN34: [3, 4, 6, 3], + * RN50: [3, 4, 6, 3], RN101: [3, 4, 23, 3], + * RN152: [3, 8, 36, 3] + * - model: Weights and bias matrices of the model + * with the following order/content: + * -> 1: Weights of conv 1 7x7, of shape (64, 3*7*7) + * -> 2: Weights of batch norm 1, of shape (64, 1). + * -> 3: Bias of batch norm 1, of shape (64, 1). + * -> 4: List of weights for first residual layer + * with 64 base channels. + * -> 5: List of weights for second residual layer + * with 128 base channels. + * -> 6: List of weights for third residual layer + * with 256 base channels. + * -> 7: List of weights for fourth residual layer + * with 512 base channels. + * List of residual layers 1, 2, 3 & 4 have + * the content/order: + * -> i: List of weights for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of weights for a residual block + * must follow the same order as defined in + * the documentation of bottleneck_block_forward(). + * -> 8: Weights of fully connected layer, of shape (C_out, 1000) + * where C_out = 512 for basic block type and C_out = 2048 + * for bottleneck block type. + * -> 9: Bias of fully connected layer, of shape (1, 1000) + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers. + * -> 1: EMA for mean of badge norm 1, of shape (64, 1). + * -> 2: EMA for variance of badge norm 1, of shape (64, 1). + * -> 3: List of EMA means and vars for residual layer 1. + * -> 4: List of EMA means and vars for residual layer 2. + * -> 5: List of EMA means and vars for residual layer 3. + * -> 6: List of EMA means and vars for residual layer 4. + * Lists for EMAs of layer 1, 2, 3 & 4 must have the + * following order: + * -> i: List of EMA means and vars for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of EMAs for a residual block + * must follow the same order as defined in + * the documentation bottleneck_block_forward(). + * - NOTICE: The lists of the first blocks for layer 2, 3 and 4 + * must include weights and EMAs for 1 extra conv layer + * and a batch norm layer for the downsampling on the + * identity path. + * + * Outputs: + * - out: Outputs, of shape (N, 1000) + * - ema_means_vars_upd: List of updated exponential moving averages + * for mean and variance of badge normalization layers. It follows + * the same exact structure as the input EMAs list. + */ + layer_sizes = list(3, 4, 23, 3) + block_type = "bottleneck" + [out, ema_means_vars_upd] = resnet::resnet_forward(X, Hin, Win, block_type, + layer_sizes, model, mode, ema_means_vars) +} diff --git a/scripts/nn/networks/resnet152.dml b/scripts/nn/networks/resnet152.dml new file mode 100644 index 00000000000..20f0be86240 --- /dev/null +++ b/scripts/nn/networks/resnet152.dml @@ -0,0 +1,101 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +source("scripts/nn/networks/resnet.dml") as resnet + +forward = function(matrix[double] X, int Hin, int Win, + list[unknown] model, string mode, + list[unknown] ema_means_vars) + return (matrix[double] out, list[unknown] ema_means_vars_upd) { + /* + * Forward pass of the ResNet50 as introduced in + * "Deep Residual Learning for Image Recognition" by + * Kaiming He et. al. and inspired by the PyTorch + * implementation. + * + * Inputs: + * - X: Inputs, of shape (N, C_in*Hin*Win). + * C_in = 3 is expected. + * - Hin: Input height. + * - Win: Input width. + * - layer_sizes: List of the sizes of each of + * the 4 residual layers. + * For ResNet18: [2, 2, 2, 2], RN34: [3, 4, 6, 3], + * RN50: [3, 4, 6, 3], RN101: [3, 4, 23, 3], + * RN152: [3, 8, 36, 3] + * - model: Weights and bias matrices of the model + * with the following order/content: + * -> 1: Weights of conv 1 7x7, of shape (64, 3*7*7) + * -> 2: Weights of batch norm 1, of shape (64, 1). + * -> 3: Bias of batch norm 1, of shape (64, 1). + * -> 4: List of weights for first residual layer + * with 64 base channels. + * -> 5: List of weights for second residual layer + * with 128 base channels. + * -> 6: List of weights for third residual layer + * with 256 base channels. + * -> 7: List of weights for fourth residual layer + * with 512 base channels. + * List of residual layers 1, 2, 3 & 4 have + * the content/order: + * -> i: List of weights for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of weights for a residual block + * must follow the same order as defined in + * the documentation of bottleneck_block_forward(). + * -> 8: Weights of fully connected layer, of shape (C_out, 1000) + * where C_out = 512 for basic block type and C_out = 2048 + * for bottleneck block type. + * -> 9: Bias of fully connected layer, of shape (1, 1000) + * - mode: 'train' or 'test' to indicate if the model is currently + * being trained or tested for badge normalization layers. + * See badge_norm2d.dml docs for more info. + * - ema_means_vars: List of exponential moving averages for mean + * and variance for badge normalization layers. + * -> 1: EMA for mean of badge norm 1, of shape (64, 1). + * -> 2: EMA for variance of badge norm 1, of shape (64, 1). + * -> 3: List of EMA means and vars for residual layer 1. + * -> 4: List of EMA means and vars for residual layer 2. + * -> 5: List of EMA means and vars for residual layer 3. + * -> 6: List of EMA means and vars for residual layer 4. + * Lists for EMAs of layer 1, 2, 3 & 4 must have the + * following order: + * -> i: List of EMA means and vars for residual block i. + * with i in {1, ..., layer_sizes[layer]} + * Each list of EMAs for a residual block + * must follow the same order as defined in + * the documentation bottleneck_block_forward(). + * - NOTICE: The lists of the first blocks for layer 2, 3 and 4 + * must include weights and EMAs for 1 extra conv layer + * and a batch norm layer for the downsampling on the + * identity path. + * + * Outputs: + * - out: Outputs, of shape (N, 1000) + * - ema_means_vars_upd: List of updated exponential moving averages + * for mean and variance of badge normalization layers. It follows + * the same exact structure as the input EMAs list. + */ + layer_sizes = list(3, 8, 36, 3) + block_type = "bottleneck" + [out, ema_means_vars_upd] = resnet::resnet_forward(X, Hin, Win, block_type, + layer_sizes, model, mode, ema_means_vars) +} From c6863bdd59e0306c4046cb4b08a6f55159c76a33 Mon Sep 17 00:00:00 2001 From: MaximilianTUB Date: Thu, 7 Dec 2023 16:52:29 +0100 Subject: [PATCH 15/15] Updated docs and removed deprecated function --- scripts/nn/networks/resnet.dml | 156 ------------------------------ scripts/nn/networks/resnet101.dml | 3 +- scripts/nn/networks/resnet152.dml | 5 +- scripts/nn/networks/resnet50.dml | 5 +- 4 files changed, 8 insertions(+), 161 deletions(-) diff --git a/scripts/nn/networks/resnet.dml b/scripts/nn/networks/resnet.dml index f7721abd228..d440e6a3432 100644 --- a/scripts/nn/networks/resnet.dml +++ b/scripts/nn/networks/resnet.dml @@ -396,162 +396,6 @@ reslayer_forward = function(matrix[double] X, int Hin, int Win, } } -resnet_basic_forward = function(matrix[double] X, int Hin, int Win, - list[unknown] layer_sizes, - list[unknown] model, string mode, - list[unknown] ema_means_vars) - return (matrix[double] out, list[unknown] ema_means_vars_upd) { - /* - * Forward pass of the ResNet 18 and 34 model as introduced - * in "Deep Residual Learning for Image Recognition" by - * Kaiming He et. al. and inspired by the PyTorch - * implementation. - * - * Inputs: - * - X: Inputs, of shape (N, C_in*Hin*Win). - * C_in = 3 is expected. - * - Hin: Input height. - * - Win: Input width. - * - layer_sizes: List of the sizes of each of - * the 4 residual layers. - * For ResNet18: [2, 2, 2, 2] - * For ResNet34: [3, 4, 6, 3] - * - model: Weights and bias matrices of the model - * with the following order/content: - * -> 1: Weights of conv 1 7x7, of shape (64, 3*7*7) - * -> 2: Weights of batch norm 1, of shape (64, 1). - * -> 3: Bias of batch norm 1, of shape (64, 1). - * -> 4: List of weights for first residual layer - * with 64 base channels. - * -> 5: List of weights for second residual layer - * with 128 base channels. - * -> 6: List of weights for third residual layer - * with 256 base channels. - * -> 7: List of weights for fourth residual layer - * with 512 base channels. - * List of residual layers 1, 2, 3 & 4 have - * the content/order: - * -> i: List of weights for residual block i. - * with i in {1, ..., layer_sizes[layer]} - * Each list of weights for a residual block - * must follow the same order as defined in - * the documentation of basic_block_forward(). - * -> 8: Weights of fully connected layer, of shape (512, 1000) - * -> 9: Bias of fully connected layer, of shape (1, 1000) - * - mode: 'train' or 'test' to indicate if the model is currently - * being trained or tested for badge normalization layers. - * See badge_norm2d.dml docs for more info. - * - ema_means_vars: List of exponential moving averages for mean - * and variance for badge normalization layers. - * -> 1: EMA for mean of badge norm 1, of shape (64, 1). - * -> 2: EMA for variance of badge norm 1, of shape (64, 1). - * -> 3: List of EMA means and vars for residual layer 1. - * -> 4: List of EMA means and vars for residual layer 2. - * -> 5: List of EMA means and vars for residual layer 3. - * -> 6: List of EMA means and vars for residual layer 4. - * Lists for EMAs of layer 1, 2, 3 & 4 must have the - * following order: - * -> i: List of EMA means and vars for residual block i. - * with i in {1, ..., layer_sizes[layer]} - * Each list of EMAs for a residual block - * must follow the same order as defined in - * the documentation of basic_block_forward(). - * - NOTICE: The lists of the first blocks for layer 2, 3 and 4 - * must include weights and EMAs for 1 extra conv layer - * and a batch norm layer for the downsampling on the - * identity path. - * - * Outputs: - * - out: Outputs, of shape (N, 1000) - * - ema_means_vars_upd: List of updated exponential moving averages - * for mean and variance of badge normalization layers. It follows - * the same exact structure as the input EMAs list. - */ - # default values - mu_bn = 0.1 - epsilon_bn = 1e-05 - - block_type = "basic" - - # extract model params - W_conv1 = as.matrix(model[1]) - gamma_bn1 = as.matrix(model[2]); beta_bn1 = as.matrix(model[3]) - weights_reslayer1 = as.list(model[4]) - weights_reslayer2 = as.list(model[5]) - weights_reslayer3 = as.list(model[6]) - weights_reslayer4 = as.list(model[7]) - W_fc = as.matrix(model[8]) - b_fc = as.matrix(model[9]) - ema_mean_bn1 = as.matrix(ema_means_vars[1]); ema_var_bn1 = as.matrix(ema_means_vars[2]) - emas_reslayer1 = as.list(ema_means_vars[3]) - emas_reslayer2 = as.list(ema_means_vars[4]) - emas_reslayer3 = as.list(ema_means_vars[5]) - emas_reslayer4 = as.list(ema_means_vars[6]) - - # Convolutional 7x7 layer - C = 64 - b_conv1 = matrix(0, rows=C, cols=1) - [out, Hout, Wout] = conv2d::forward(X=X, W=W_conv1, b=b_conv1, C=3, - Hin=Hin, Win=Win, Hf=7, Wf=7, strideh=2, - stridew=2, padh=3, padw=3) - # Batch Normalization - [out, ema_mean_bn1_upd, ema_var_bn1_upd, c_mean, c_var] = bn2d::forward(X=out, - gamma=gamma_bn1, beta=beta_bn1, C=C, Hin=Hout, - Win=Wout, mode=mode, ema_mean=ema_mean_bn1, - ema_var=ema_var_bn1, mu=mu_bn, - epsilon=epsilon_bn) - # ReLU - out = relu::forward(X=out) - # Max Pooling 3x3 - [out, Hout, Wout] = mp2d::forward(X=out, C=C, Hin=Hout, Win=Wout, Hf=3, - Wf=3, strideh=2, stridew=2, padh=1, padw=1) - - # residual layer 1 - block_count = as.integer(as.scalar(layer_sizes[1])) - [out, Hout, Wout, emas1_upd] = reslayer_forward(X=out, Hin=Hout, - Win=Wout, block_type=block_type, - blocks=block_count, strideh=1, - stridew=1, C_in=C, C_base=64, - blocks_weights=weights_reslayer1, mode=mode, - ema_means_vars=emas_reslayer1) - C = 64 - # residual layer 2 - block_count = as.integer(as.scalar(layer_sizes[2])) - [out, Hout, Wout, emas2_upd] = reslayer_forward(X=out, Hin=Hout, - Win=Wout, block_type=block_type, - blocks=block_count, strideh=2, - stridew=2, C_in=C, C_base=128, - blocks_weights=weights_reslayer2, mode=mode, - ema_means_vars=emas_reslayer2) - C = 128 - # residual layer 3 - block_count = as.integer(as.scalar(layer_sizes[3])) - [out, Hout, Wout, emas3_upd] = reslayer_forward(X=out, Hin=Hout, - Win=Wout, block_type=block_type, - blocks=block_count, strideh=2, - stridew=2, C_in=C, C_base=256, - blocks_weights=weights_reslayer3, mode=mode, - ema_means_vars=emas_reslayer3) - C = 256 - # residual layer 4 - block_count = as.integer(as.scalar(layer_sizes[4])) - [out, Hout, Wout, emas4_upd] = reslayer_forward(X=out, Hin=Hout, - Win=Wout, block_type=block_type, - blocks=block_count, strideh=2, - stridew=2, C_in=C, C_base=512, - blocks_weights=weights_reslayer4, mode=mode, - ema_means_vars=emas_reslayer4) - C = 512 - - # Global Average Pooling - [out, Hout, Wout] = ap2d::forward(X=out, C=C, Hin=Hout, Win=Wout) - # Affine - out = fc::forward(X=out, W=W_fc, b=b_fc) - - ema_means_vars_upd = list(ema_mean_bn1_upd, ema_var_bn1_upd, - emas1_upd, emas2_upd, emas3_upd, emas4_upd) -} - resnet_forward = function(matrix[double] X, int Hin, int Win, string block_type, list[unknown] layer_sizes, list[unknown] model, string mode, diff --git a/scripts/nn/networks/resnet101.dml b/scripts/nn/networks/resnet101.dml index e91905b0c0e..28679e8eca9 100644 --- a/scripts/nn/networks/resnet101.dml +++ b/scripts/nn/networks/resnet101.dml @@ -28,7 +28,8 @@ forward = function(matrix[double] X, int Hin, int Win, /* * Forward pass of the ResNet101 as introduced in * "Deep Residual Learning for Image Recognition" by - * Kaiming He et. al. and inspired by the PyTorch + * Kaiming He et. al., refined in "ResNet v1.5 for + * PyTorch" by NVIDIA and inspired by the PyTorch * implementation. * * Inputs: diff --git a/scripts/nn/networks/resnet152.dml b/scripts/nn/networks/resnet152.dml index 20f0be86240..9ef9d28bb00 100644 --- a/scripts/nn/networks/resnet152.dml +++ b/scripts/nn/networks/resnet152.dml @@ -26,9 +26,10 @@ forward = function(matrix[double] X, int Hin, int Win, list[unknown] ema_means_vars) return (matrix[double] out, list[unknown] ema_means_vars_upd) { /* - * Forward pass of the ResNet50 as introduced in + * Forward pass of the ResNet101 as introduced in * "Deep Residual Learning for Image Recognition" by - * Kaiming He et. al. and inspired by the PyTorch + * Kaiming He et. al., refined in "ResNet v1.5 for + * PyTorch" by NVIDIA and inspired by the PyTorch * implementation. * * Inputs: diff --git a/scripts/nn/networks/resnet50.dml b/scripts/nn/networks/resnet50.dml index b47e4479b9d..d11c798156f 100644 --- a/scripts/nn/networks/resnet50.dml +++ b/scripts/nn/networks/resnet50.dml @@ -26,9 +26,10 @@ forward = function(matrix[double] X, int Hin, int Win, list[unknown] ema_means_vars) return (matrix[double] out, list[unknown] ema_means_vars_upd) { /* - * Forward pass of the ResNet50 as introduced in + * Forward pass of the ResNet101 as introduced in * "Deep Residual Learning for Image Recognition" by - * Kaiming He et. al. and inspired by the PyTorch + * Kaiming He et. al., refined in "ResNet v1.5 for + * PyTorch" by NVIDIA and inspired by the PyTorch * implementation. * * Inputs: