Sync arch refactor (#201)

* Fix coaster UI tests (rustc error messages changed in 1.62 (#172) * Fix Linear layer bias gradient computation; add size checks to CUDA functions (#170) * Assert the correct tensor sizes in copy() and gemm(); fix related Linear logic * Check output matrix dims in GEMM; fix corresponding Linear layer logic * Update coaster-blas/src/frameworks/cuda/helper.rs * Fix merge mistake in commit 6952a49 (#173) * doc: clarify remote test (#175) * bump rust-bindgen to 0.60.1, bump cargo lock file (#174) * build(deps): bump capnp from 0.14.9 to 0.14.11 (#179) Bumps [capnp](https://github.com/capnproto/capnproto-rust) from 0.14.9 to 0.14.11. - [Release notes](https://github.com/capnproto/capnproto-rust/releases) - [Commits](capnproto/capnproto-rust@capnp-v0.14.9...capnp-v0.14.11) --- updated-dependencies: - dependency-name: capnp dependency-type: direct:production ... * build(deps): bump tokio from 1.21.0 to 1.23.1 (#183) Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.21.0 to 1.23.1. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](tokio-rs/tokio@tokio-1.21.0...tokio-1.23.1) --- updated-dependencies: - dependency-name: tokio dependency-type: direct:production ... * build(deps): bump bumpalo from 3.11.0 to 3.12.0 (#187) Bumps [bumpalo](https://github.com/fitzgen/bumpalo) from 3.11.0 to 3.12.0. - [Release notes](https://github.com/fitzgen/bumpalo/releases) - [Changelog](https://github.com/fitzgen/bumpalo/blob/main/CHANGELOG.md) - [Commits](fitzgen/bumpalo@3.11.0...3.12.0) --- updated-dependencies: - dependency-name: bumpalo dependency-type: indirect ... * build(deps): bump tokio from 1.23.1 to 1.24.2 (#191) Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.23.1 to 1.24.2. - [Release notes](https://github.com/tokio-rs/tokio/releases) - [Commits](https://github.com/tokio-rs/tokio/commits) --- updated-dependencies: - dependency-name: tokio dependency-type: direct:production ... * Now also saves bias layers (#193) * build(deps): bump openssl from 0.10.41 to 0.10.48 Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.41 to 0.10.48. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](sfackler/rust-openssl@openssl-v0.10.41...openssl-v0.10.48) updated-dependencies: - dependency-name: openssl dependency-type: indirect ... Signed-off-by: dependabot[bot] <[email protected]> * Do not pass batch_size to cudnnGetRNNParamsSize(). * Add a feature for deterministic (pseudo)randomizing. * New network architecture pieces: Layer, Descriptor, Context, Network (#165) * New network architecture pieces: Layer, Descriptor, Context, Network * Update juice/src/net/descriptor.rs * Implement Sequential layer for the new architecture (#168) * Implement Sequential layer * Fix coaster UI tests (rustc error messages changed in 1.62 (#172) * Fix Linear layer bias gradient computation; add size checks to CUDA functions (#170) * Assert the correct tensor sizes in copy() and gemm(); fix related Linear logic * Check output matrix dims in GEMM; fix corresponding Linear layer logic * Update coaster-blas/src/frameworks/cuda/helper.rs * More ergonomic net creation and fallible Sequential constructor * Fix merge mistake in commit 6952a49 * Add a few more layers to the new architecture (#176) * Add trainer subsystem with SGD and Adam optimizers (#177) * Coaster convolution API cleanup (#178) * Move Convolution workspace into context * Implement Convolution, Dropout and Pooling layers (#180) * Move Convolution workspace into context * Formatting fixes * Fixed unit tests * Partial implementation of the Convolution layer * Implement the remaining parts for Convolution layer * Implement dropout and pooling layers * Fix CUDA tensor descriptor size error and adjust layer testing infra * Extended debug output for layers with custom Debug impl * Add softmax layers and convert MNIST example (#184) * Move Convolution workspace into context * Formatting fixes * Fixed unit tests * Partial implementation of the Convolution layer * Implement the remaining parts for Convolution layer * Implement dropout and pooling layers * Fix CUDA tensor descriptor size error and adjust layer testing infra * Extended debug output for layers with custom Debug impl * Changed mnist example to the new architecture * Plumbed the momentum arg in the mnist example * Implemented softmax and logsoftmax layers * Remove unnecessary NLL parameter and fix mnist example * Fix native backend softmax and logsoftmax grad computation * Changed slicing syntax in native backend softmax functions * Convert juice benchtests to Criterion (#192) * Convert Juice benchmarks to Criterion * Add newline at the end of Cargo.toml * Made Layer operations return a Result (#186) * Made Layer operations return a Result * Change LayerError to contain Boxes * Update benchmarks for new layer API * Simplify new_rnn_config() Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: Mikhail Balakhno <{ID}+{username}@users.noreply.github.com> Co-authored-by: Bernhard Schuster <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: opfromthestart <[email protected]>
fff-rs · Mar 15, 2024 · 1eeb939 · 1eeb939
1 parent c772680
commit 1eeb939
Show file tree

Hide file tree

Showing 20 changed files with 12,816 additions and 18,599 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/coaster-nn/src/frameworks/cuda/mod.rs b/coaster-nn/src/frameworks/cuda/mod.rs
@@ -831,14 +831,17 @@ where
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, Error> {
         let cudnn_framework = self.framework().cudnn();
         let data_type = <T as DataTypeInfo>::cudnn_data_type();
 
-        // MiniBatch, LayerSize, 1
-        let dim_x = vec![batch_size, input_size, 1];
+        // According to cuDNN API reference and examples, xDesc should have a
+        // least 3 dimensions with batch_size being the first. However, weights
+        // size does not depend on batch size and we'd like to avoid having to
+        // specify batch size in advance (as it can change during execution).
+        // So we use batch_size = 1 as it appers to work well.
+        let dim_x = vec![1, input_size, 1];
         let stride_x = vec![dim_x[2] * dim_x[1], dim_x[2], 1];
 
         // dummy desc to get the param size

diff --git a/coaster-nn/src/frameworks/native/mod.rs b/coaster-nn/src/frameworks/native/mod.rs
@@ -887,7 +887,6 @@ where
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, Error> {
         // This will end up being the tensor descriptor for the weights associated with the RNN pass

diff --git a/coaster-nn/src/plugin.rs b/coaster-nn/src/plugin.rs
@@ -329,7 +329,6 @@ pub trait Rnn<F>: NN<F> {
     fn generate_rnn_weight_description(
         &self,
         rnn_config: &Self::CRNN,
-        batch_size: i32,
         input_size: i32,
     ) -> Result<Vec<usize>, crate::co::error::Error>;
 

diff --git a/coaster-nn/src/tests/rnn.rs b/coaster-nn/src/tests/rnn.rs
@@ -55,7 +55,7 @@ where
         .unwrap();
 
     let filter_dimensions = backend
-        .generate_rnn_weight_description(&rnn_config, BATCH_SIZE as i32, INPUT_SIZE as i32)
+        .generate_rnn_weight_description(&rnn_config, INPUT_SIZE as i32)
         .unwrap();
 
     let w = uniformly_random_tensor::<T, F>(

diff --git a/juice-examples/mackey-glass-rnn-regression/README.md b/juice-examples/mackey-glass-rnn-regression/README.md
@@ -39,13 +39,14 @@ Rustflags must be set to link natively to `cuda.lib` and `cudnn.h` in the patter
 A generated version of Mackey-Glass is packaged with Juice, and packaged in a way suitable for RNN networks.
 
 ```bash
+cd juice-examples/mackey-glass-rnn-regression
 # Train a RNN Network (*nix)
-./target/release/example-rnn-regression train --file=SavedRNNNetwork.juice --learningRate=0.01 --batchSize=40
+../../target/release/example-rnn-regression train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 
 # Train a RNN Network (Windows)
-.\target\release\example-rnn-regression.exe train --file=SavedRNNNetwork.juice --learningRate=0.01 --batchSize=40
+..\..\target\release\example-rnn-regression.exe train --learning-rate=0.01 --batch-size=40 SavedRNNNetwork.juice 
 
 # Test the RNN Network (*nix)
-../target/release/example-rnn-regression test --file=SavedRNNNetwork.juice
+../../target/release/example-rnn-regression test --batch-size=40  SavedRNNNetwork.juice
 # Test the RNN Network (Windows)
-cd ../target/release/ && example-rnn-regression.exe test --file=SavedRNNNetwork.juice
+..\..\target\release\example-rnn-regression.exe test --batch-size=40 SavedRNNNetwork.juice
 ```
diff --git a/juice-examples/mackey-glass-rnn-regression/rnn.juice b/juice-examples/mackey-glass-rnn-regression/rnn.juice
diff --git a/juice-examples/mnist-image-multiclass-classification/Cargo.toml b/juice-examples/mnist-image-multiclass-classification/Cargo.toml
@@ -18,7 +18,7 @@ juice-utils = {path = "../juice-utils"}
 
 csv = "1"
 hyper = "0.14"
-hyper-rustls = "0.22"
+hyper-rustls = "0.23"
 futures = "0.3"
 futures-util = "0.3"
 

diff --git a/juice/Cargo.toml b/juice/Cargo.toml
@@ -42,6 +42,8 @@ default = ["native", "cuda"]
 native = ["coaster-blas/native", "coaster-nn/native"]
 cuda = ["coaster/cuda", "coaster-blas/cuda", "coaster-nn/cuda"]
 opencl = ["coaster/opencl", "coaster-blas/opencl", "coaster-nn/opencl"]
+# When enabled, all weights are initialized in a deterministic way.
+deterministic = []
 
 [[bench]]
 name = "network_benches"

diff --git a/juice/src/layer.rs b/juice/src/layer.rs
@@ -306,7 +306,15 @@ impl<B: IBackend> Layer<B> {
             } else {
                 format!("{}-{}", self.name, weight_id)
             };
+
+            let display_name_bias = if !weight_name.is_empty() {
+                format!("{weight_name}-bias")
+            } else {
+                format!("{}-{}-bias", self.name, weight_id)
+            };
+
             self.weights_display_names.push(display_name.clone());
+            self.weights_display_names.push(display_name_bias.clone());
             // create name for registry
             let registry_name = format!("SHARED_WEIGHT_{}", display_name);
 
@@ -925,6 +933,8 @@ impl<'a, B: IBackend> CapnpWrite<'a> for Layer<B> {
             let names = self.learnable_weights_names();
             let weights_data = self.learnable_weights_data();
 
+            assert_eq!(names.len(), weights_data.len(), "All layers must be named");
+
             for (i, (name, weight)) in names.iter().zip(weights_data).enumerate() {
                 let mut capnp_weight = weights.reborrow().get(i as u32);
                 capnp_weight.set_name(name);

diff --git a/juice/src/layers/common/linear.rs b/juice/src/layers/common/linear.rs
@@ -19,6 +19,8 @@
 //! In the context of convolutional neural networks this layer is also
 //! called a "fully-connected layer" if it is used at the end of the network.
 
+use rand::distributions::Distribution;
+
 use crate::capnp_util::*;
 use crate::co::backend::IBackend;
 use crate::co::tensor::SharedTensor;
@@ -27,6 +29,7 @@ use crate::juice_capnp::linear_config as capnp_config;
 use crate::layer::*;
 use crate::util::{native_scalar, ArcLock, LayerOps};
 use crate::weight::FillerType;
+use rand::{self, prelude::*};
 
 #[derive(Debug)]
 /// Linear Layer
@@ -112,7 +115,13 @@ impl<B: IBackend + LayerOps<f32>> ILayer<B> for Linear {
             weight.write().unwrap().resize(&(1, self.output_size)).unwrap();
             // Weight Initialisation for bias is typically a constant, and a suitable initialisation
             // is stated in https://cs231n.github.io/neural-networks-2/#init for non-LSTM types.
-            let initialisation_constant = rand::random::<f32>();
+
+            #[cfg(feature = "deterministic")]
+            let mut rng = rand::rngs::StdRng::seed_from_u64(2301); // Arbitrary seed.
+            #[cfg(not(feature = "deterministic"))]
+            let mut rng = thread_rng();
+
+            let initialisation_constant: f32 = rand::distributions::Standard {}.sample(&mut rng);
             let filler = FillerType::Constant {
                 value: initialisation_constant * (2.0 / initialisation_constant).sqrt(),
             };

diff --git a/juice/src/layers/common/rnn.rs b/juice/src/layers/common/rnn.rs
@@ -137,7 +137,7 @@ impl<B: IBackend + conn::Rnn<f32>> ILayer<B> for Rnn<B> {
             .unwrap();
 
         let filter_dimensions: TensorDesc = backend
-            .generate_rnn_weight_description(&config, batch_size as i32, input_size as i32)
+            .generate_rnn_weight_description(&config, input_size as i32)
             .unwrap();
 
         // weights
@@ -492,7 +492,6 @@ mod tests {
         let filter_dimensions = <Backend<Cuda> as conn::Rnn<f32>>::generate_rnn_weight_description(
             &backend,
             &config,
-            BATCH_SIZE as i32,
             INPUT_SIZE as i32,
         )
         .unwrap();

diff --git a/juice/src/weight.rs b/juice/src/weight.rs
@@ -200,7 +200,11 @@ impl FillerType {
         let native_weight = weight.write_only(native.device()).unwrap();
         let init_range = (6.0f32 / (num_inputs as f32 + num_outputs as f32)).sqrt();
 
+        #[cfg(feature = "deterministic")]
+        let mut rng = rand::rngs::StdRng::seed_from_u64(2301);  // Arbitrary seed.
+        #[cfg(not(feature = "deterministic"))]
         let mut rng = thread_rng();
+
         let between = rand::distributions::Uniform::from(-init_range..=init_range);
         for e in native_weight.as_mut_slice::<f32>() {
             *e = between.sample(&mut rng);

diff --git a/rcublas/cublas-sys/Cargo.toml b/rcublas/cublas-sys/Cargo.toml
@@ -19,7 +19,7 @@ libc = "0.2"
 
 [build-dependencies]
 pkg-config = "0.3"
-bindgen = { version = "^0.59.1", optional = true }
+bindgen = { version = "^0.60.1", optional = true }
 
 [features]
 default = []