Skip to content

Commit

Permalink
Add softmax layers and convert MNIST example (#184)
Browse files Browse the repository at this point in the history
* Move Convolution workspace into context

* Formatting fixes

* Fixed unit tests

* Partial implementation of the Convolution layer

* Implement the remaining parts for Convolution layer

* Implement dropout and pooling layers

* Fix CUDA tensor descriptor size error and adjust layer testing infra

* Extended debug output for layers with custom Debug impl

* Changed mnist example to the new architecture

* Plumbed the momentum arg in the mnist example

* Implemented softmax and logsoftmax layers

* Remove unnecessary NLL parameter and fix mnist example

* Fix native backend softmax and logsoftmax grad computation

* Changed slicing syntax in native backend softmax functions

Co-authored-by: Mikhail Balakhno <{ID}+{username}@users.noreply.github.com>
  • Loading branch information
hweom and Mikhail Balakhno authored Jan 9, 2023
1 parent c388ebb commit 1a6a820
Show file tree
Hide file tree
Showing 10 changed files with 341 additions and 152 deletions.
108 changes: 86 additions & 22 deletions coaster-nn/src/frameworks/native/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -307,17 +307,32 @@ macro_rules! impl_ops_softmax_for {
x: &SharedTensor<$t>,
result: &mut SharedTensor<$t>,
) -> Result<(), Error> {
// Input tensor must have at least 2 dimensions.
// First dimension is treated as a batch number.
assert!(
x.desc().size() > 1,
"Input tensor for softmax must have at least 2 dimensions, got {:?}",
x.desc()
);

let batch_size = x.desc()[0];
let item_size = x.desc().iter().skip(1).fold(1, |acc, v| acc * v);

let xs = read!(x, $t, self);
let rs = write_only!(result, $t, self);

map1(xs, rs, |v| v.exp())?;

let mut sum: $t = 0.0; // iter_arith is not stable yet
for r in &*rs {
sum += *r;
}
for r in rs {
*r /= sum;
for i in 0..batch_size {
let batch_item = &mut rs[i * item_size..][..item_size];

let mut sum: $t = 0.0; // iter_arith is not stable yet
for r in &*batch_item {
sum += *r;
}
for r in &mut *batch_item {
*r /= sum;
}
}
Ok(())
}
Expand All @@ -329,16 +344,32 @@ macro_rules! impl_ops_softmax_for {
x_diff: &SharedTensor<$t>,
result_diff: &mut SharedTensor<$t>,
) -> Result<(), Error> {
let batch_size = x.desc()[0];
let item_size = x.desc().iter().skip(1).fold(1, |acc, v| acc * v);

let xs = read!(x, $t, self);
let dxs = read!(x_diff, $t, self);
let drs = write_only!(result_diff, $t, self);

let mut dot: $t = 0.0;
for (t, dt) in xs.iter().zip(dxs.iter()) {
dot += t * dt;
for i in 0..batch_size {
let batch_item_in = &xs[i * item_size..][..item_size];
let batch_item_diff_in = &dxs[i * item_size..][..item_size];
let batch_item_out = &mut drs[i * item_size..][..item_size];

let mut dot: $t = 0.0;
for (t, dt) in batch_item_in.iter().zip(batch_item_diff_in.iter()) {
dot += t * dt;
}

map2(
batch_item_in,
batch_item_diff_in,
batch_item_out,
|t, dt| t * (dt - dot),
)?;
}

map2(xs, dxs, drs, |t, dt| t * (dt - dot))
Ok(())
}
}
};
Expand All @@ -354,20 +385,37 @@ macro_rules! impl_ops_log_softmax_for {
x: &SharedTensor<$t>,
result: &mut SharedTensor<$t>,
) -> Result<(), $crate::co::error::Error> {
// Input tensor must have at least 2 dimensions.
// First dimension is treated as a batch number.
assert!(
x.desc().size() > 1,
"Input tensor for softmax must have at least 2 dimensions, got {:?}",
x.desc()
);

let batch_size = x.desc()[0];
let item_size = x.desc().iter().skip(1).fold(1, |acc, v| acc * v);

let xs = read!(x, $t, self);
let rs = write_only!(result, $t, self);

let max_x = xs
.iter()
.fold(::std::$t::NEG_INFINITY, |acc, &t| acc.max(t));
for i in 0..batch_size {
let batch_item_in = &xs[i * item_size..][..item_size];
let batch_item_out = &mut rs[i * item_size..][..item_size];
let max_x = batch_item_in
.iter()
.fold(::std::$t::NEG_INFINITY, |acc, &t| acc.max(t));

let mut logsum: $t = 0.0;
for t in xs {
logsum += (-(max_x - t)).exp();
let mut logsum: $t = 0.0;
for t in batch_item_in {
logsum += (*t - max_x).exp();
}
logsum = max_x + logsum.ln();

map1(batch_item_in, batch_item_out, |t| t - logsum)?;
}
logsum = max_x + logsum.ln();

map1(xs, rs, |t| t - logsum)
Ok(())
}

fn log_softmax_grad(
Expand All @@ -376,15 +424,31 @@ macro_rules! impl_ops_log_softmax_for {
x_diff: &SharedTensor<$t>,
result_diff: &mut SharedTensor<$t>,
) -> Result<(), $crate::co::error::Error> {
let batch_size = x.desc()[0];
let item_size = x.desc().iter().skip(1).fold(1, |acc, v| acc * v);

let xs = read!(x, $t, self);
let dxs = read!(x_diff, $t, self);
let drs = write_only!(result_diff, $t, self);

let mut sum: $t = 0.0;
for &grad_val in dxs.iter() {
sum += grad_val;
for i in 0..batch_size {
let batch_item_in = &xs[i * item_size..][..item_size];
let batch_item_diff_in = &dxs[i * item_size..][..item_size];
let batch_item_out = &mut drs[i * item_size..][..item_size];

let mut sum: $t = 0.0;
for &grad_val in batch_item_diff_in.iter() {
sum += grad_val;
}
map2(
batch_item_in,
batch_item_diff_in,
batch_item_out,
|t, dt| dt - t.exp() * sum,
)?;
}
map2(xs, dxs, drs, |t, dt| dt - t.exp() * sum)

Ok(())
}
}
};
Expand Down
8 changes: 7 additions & 1 deletion coaster-nn/src/plugin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,10 @@ pub trait Convolution<F>: NN<F> {
/// Provides the functionality for a Backend to support Softmax operations.
pub trait Softmax<F>: NN<F> {
/// Computes a [Softmax][softmax] over the input Tensor `x`.
/// [softmax]: https://en.wikipedia.org/wiki/Softmax_function
/// [softmax]: https://en.wikipedia.org/wiki/Softmax_function.
/// Tensor must have more than one dimensions: N,D1,..., where first dimension N
/// is interpreted as the batch size. Softmax operation is applied independently
/// to each batch item over D1,... .
///
/// Saves the result to `result`.
fn softmax(
Expand All @@ -645,6 +648,9 @@ pub trait Softmax<F>: NN<F> {
/// Provides the functionality for a Backend to support LogSoftmax operations.
pub trait LogSoftmax<F>: NN<F> {
/// Computes a logarithmic softmax over the input Tensor `x`.
/// Tensor must have more than one dimensions: N,D1,..., where first dimension N
/// is interpreted as the batch size. LogSoftmax operation is applied independently
/// to each batch item over D1,... .
///
/// Saves the result to `result`.
fn log_softmax(
Expand Down
2 changes: 1 addition & 1 deletion coaster-nn/src/tests/softmax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::co::prelude::*;
use crate::plugin::{LogSoftmax, Softmax};
use crate::tests::{filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor, Epsilon};

const DIMS: [usize; 3] = [4, 1, 3];
const DIMS: [usize; 4] = [1, 4, 1, 3];

const IN: [f64; 12] = [
-0.3768541784373798341,
Expand Down
Loading

0 comments on commit 1a6a820

Please sign in to comment.