From ffcf59266b6e91d7ada8af332465f973ae9b345f Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Wed, 31 Mar 2021 18:50:30 +0200 Subject: [PATCH 1/2] refactor/error: use thiserror to improve error handling, unknown must carry error code --- Cargo.lock | 23 ++ coaster/Cargo.toml | 3 +- .../src/frameworks/cuda/api/driver/context.rs | 8 +- .../src/frameworks/cuda/api/driver/device.rs | 14 +- .../src/frameworks/cuda/api/driver/error.rs | 251 +++++------------- .../src/frameworks/cuda/api/driver/memory.rs | 10 +- .../src/frameworks/cuda/api/driver/utils.rs | 3 +- juice-examples/juice-utils/Cargo.toml | 1 + .../mackey-glass-rnn-regression/Cargo.toml | 1 + .../Cargo.toml | 1 + rcublas/cublas/Cargo.toml | 1 + rcublas/cublas/src/api/level1.rs | 22 +- rcublas/cublas/src/api/level3.rs | 3 +- rcublas/cublas/src/api/util.rs | 21 +- rcublas/cublas/src/error.rs | 71 +---- rcudnn/cudnn/Cargo.toml | 1 + rcudnn/cudnn/src/api/activation.rs | 18 +- rcudnn/cudnn/src/api/convolution.rs | 53 ++-- rcudnn/cudnn/src/api/cuda.rs | 11 +- rcudnn/cudnn/src/api/dropout.rs | 26 +- rcudnn/cudnn/src/api/normalization.rs | 24 +- rcudnn/cudnn/src/api/pooling.rs | 33 +-- rcudnn/cudnn/src/api/rnn.rs | 43 +-- rcudnn/cudnn/src/api/softmax.rs | 6 +- rcudnn/cudnn/src/api/tensor.rs | 28 +- rcudnn/cudnn/src/api/utils.rs | 7 +- rcudnn/cudnn/src/error.rs | 71 ++--- 27 files changed, 316 insertions(+), 438 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f9c2b0bb..c4af51a02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -258,6 +258,7 @@ dependencies = [ "rcublas", "rcudnn", "regex", + "thiserror", ] [[package]] @@ -1790,6 +1791,7 @@ dependencies = [ "libc", "log", "rcublas-sys", + "thiserror", ] [[package]] @@ -1809,6 +1811,7 @@ dependencies = [ "libc", "num 0.1.42", "rcudnn-sys", + "thiserror", ] [[package]] @@ -2273,6 +2276,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.0.1" diff --git a/coaster/Cargo.toml b/coaster/Cargo.toml index 87119e817..f4d41ba11 100644 --- a/coaster/Cargo.toml +++ b/coaster/Cargo.toml @@ -23,7 +23,8 @@ enum_primitive = "0.1" byteorder = "1" num = "0.2" lazy_static = "1" -regex = "1" +regex = "1.2" +thiserror = "1.0" rcudnn = { version = "1.7", path = "../rcudnn/cudnn", optional = true } rcublas = { version = "0.5", path = "../rcublas/cublas", optional = true } diff --git a/coaster/src/frameworks/cuda/api/driver/context.rs b/coaster/src/frameworks/cuda/api/driver/context.rs index f793ac3e3..d8ce00f80 100644 --- a/coaster/src/frameworks/cuda/api/driver/context.rs +++ b/coaster/src/frameworks/cuda/api/driver/context.rs @@ -45,8 +45,8 @@ impl API { CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory("Device is out of memory.")), - CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown("An unknown Error occured. Check the CUDA DRIVER API manual for more details.")), - _ => Err(Error::Unknown("Unable to create Cuda context.")), + status @ CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown("An unknown Error occured. Check the CUDA DRIVER API manual for more details.", status as i32 as u64)), + status => Err(Error::Unknown("Unable to create Cuda context.", status as i32 as u64)), } } @@ -59,7 +59,7 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to destroy Cuda context.")), + status => Err(Error::Unknown("Unable to destroy Cuda context.", status as i32 as u64)), } } @@ -70,7 +70,7 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to synchronize CUDA context.")), + status => Err(Error::Unknown("Unable to synchronize CUDA context.", status as i32 as u64)), } } } diff --git a/coaster/src/frameworks/cuda/api/driver/device.rs b/coaster/src/frameworks/cuda/api/driver/device.rs index 19ac08dd3..4e8200324 100644 --- a/coaster/src/frameworks/cuda/api/driver/device.rs +++ b/coaster/src/frameworks/cuda/api/driver/device.rs @@ -77,7 +77,8 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to get Device count.")), + status => Err(Error::Unknown("Unable to get Device count.", status as i32 as u64)), + } } @@ -90,7 +91,8 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to get Device count.")), + status => Err(Error::Unknown("Unable to get Device count.", status as i32 as u64)), + } } @@ -106,7 +108,8 @@ impl API { CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - _ => Err(Error::Unknown("Unable to get device attribute.")) + status => Err(Error::Unknown("Unable to get device attribute.", status as i32 as u64)), + } } @@ -122,7 +125,8 @@ impl API { CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - _ => Err(Error::Unknown("Unable to get device name.")) + status => Err(Error::Unknown("Unable to get device name.", status as i32 as u64)), + } } @@ -137,7 +141,7 @@ impl API { CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - _ => Err(Error::Unknown("Unable to get total mem of device.")) + status => Err(Error::Unknown("Unable to get total mem of device.", status as i32 as u64)) } } } diff --git a/coaster/src/frameworks/cuda/api/driver/error.rs b/coaster/src/frameworks/cuda/api/driver/error.rs index 3bfc3feae..8145c6038 100644 --- a/coaster/src/frameworks/cuda/api/driver/error.rs +++ b/coaster/src/frameworks/cuda/api/driver/error.rs @@ -1,250 +1,121 @@ //! Provides Rust Errors for OpenCL's status. -use std::{fmt, error}; - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -/// Defines OpenCL errors. +#[allow(missing_docs)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, thiserror::Error)] +/// OpenCL device errors pub enum Error { - /// Failure with provided value. + #[error("{0}")] InvalidValue(&'static str), - /// Failure with memory allocation. + #[error("{0}")] OutOfMemory(&'static str), - /// Failure with Cuda initialization. + #[error("{0}")] NotInitialized(&'static str), - /// Failure with Cuda initialization. + #[error("{0}")] Deinitialized(&'static str), - /// Failure with Profiler. + #[error("{0}")] ProfilerDisabled(&'static str), - /// Failure with Profiler. + #[error("{0}")] ProfilerNotInitialized(&'static str), - /// Failure with Profiler. + #[error("{0}")] ProfilerAlreadyStarted(&'static str), - /// Failure with Profiler. + #[error("{0}")] ProfilerAlreadyStopped(&'static str), - /// Failure with Cuda devices. + #[error("{0}")] NoDevice(&'static str), - /// Failure with provided Cuda device. + #[error("{0}")] InvalidDevice(&'static str), - /// Failure with provided Cuda image. + #[error("{0}")] InvalidImage(&'static str), - /// Failure with provided Cuda context. + #[error("{0}")] InvalidContext(&'static str), - /// Failure with provided Cuda context. + #[error("{0}")] ContextAlreadyCurrent(&'static str), - /// Failure + #[error("{0}")] MapFailed(&'static str), - /// Failure + #[error("{0}")] UnmapFailed(&'static str), - /// Failure + #[error("{0}")] ArrayIsMapped(&'static str), - /// Failure + #[error("{0}")] AlreadyMapped(&'static str), - /// Failure with binary. + #[error("{0}")] NoBinaryForGpu(&'static str), - /// Failure + #[error("{0}")] AlreadyAquired(&'static str), - /// Failure + #[error("{0}")] NotMapped(&'static str), - /// Failure + #[error("{0}")] NotMappedAsArray(&'static str), - /// Failure + #[error("{0}")] NotMappedAsPointer(&'static str), - /// Failure + #[error("{0}")] EccUncorrectable(&'static str), - /// Failure + #[error("{0}")] UnsupportedLimit(&'static str), - /// Failure with context. + #[error("{0}")] ContextAlreadyInUse(&'static str), - /// Failure + #[error("{0}")] PeerAccessUnsupported(&'static str), - /// Failure with provided PTX. + #[error("{0}")] InvalidPtx(&'static str), - /// Failure + #[error("{0}")] InvalidGraphicsContent(&'static str), - /// Failure + #[error("{0}")] InvalidSource(&'static str), - /// Failure + #[error("{0}")] FileNotFound(&'static str), - /// Failure + #[error("{0}")] SharedObjectSymbolNotFound(&'static str), - /// Failure + #[error("{0}")] SharedObjectInitFailed(&'static str), - /// Failure + #[error("{0}")] OperatingSystem(&'static str), - /// Failure + #[error("{0}")] InvalidHandle(&'static str), - /// Failure + #[error("{0}")] NotFound(&'static str), - /// Failure + #[error("{0}")] NotReady(&'static str), - /// Failure + #[error("{0}")] IllegalAddress(&'static str), - /// Failure + #[error("{0}")] LaunchOutOfResources(&'static str), - /// Failure + #[error("{0}")] LaunchTimeout(&'static str), - /// Failure + #[error("{0}")] LauncIncompatibleTexturing(&'static str), - /// Failure + #[error("{0}")] PeerAccessAlreadyEnabled(&'static str), - /// Failure + #[error("{0}")] PeerAccessNotEnabled(&'static str), - /// Failure + #[error("{0}")] PrimaryContextActive(&'static str), - /// Failure + #[error("{0}")] ContextIsDestroyed(&'static str), - /// Failure + #[error("{0}")] Assert(&'static str), - /// Failure + #[error("{0}")] TooManyPeers(&'static str), - /// Failure + #[error("{0}")] HostMemoryAlreadyRegistered(&'static str), - /// Failure + #[error("{0}")] HostMemoryNotRegistered(&'static str), - /// Failure + #[error("{0}")] HardwareStackError(&'static str), - /// Failure + #[error("{0}")] IllegalInstruction(&'static str), - /// Failure + #[error("{0}")] MisalignedAddress(&'static str), - /// Failure + #[error("{0}")] InvalidAddressSpace(&'static str), - /// Failure + #[error("{0}")] InvalidPc(&'static str), - /// Failure + #[error("{0}")] LaunchFailed(&'static str), - /// Failure + #[error("{0}")] NotPermitted(&'static str), - /// Failure + #[error("{0}")] NotSupported(&'static str), - /// Failure - Unknown(&'static str), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::ProfilerNotInitialized(ref err) => write!(f, "{:?}", err), - Error::ProfilerDisabled(ref err) => write!(f, "{:?}", err), - Error::Deinitialized(ref err) => write!(f, "{:?}", err), - Error::NotInitialized(ref err) => write!(f, "{:?}", err), - Error::OutOfMemory(ref err) => write!(f, "{:?}", err), - Error::InvalidValue(ref err) => write!(f, "{:?}", err), - Error::NoBinaryForGpu(ref err) => write!(f, "{:?}", err), - Error::AlreadyMapped(ref err) => write!(f, "{:?}", err), - Error::ArrayIsMapped(ref err) => write!(f, "{:?}", err), - Error::UnmapFailed(ref err) => write!(f, "{:?}", err), - Error::MapFailed(ref err) => write!(f, "{:?}", err), - Error::ContextAlreadyCurrent(ref err) => write!(f, "{:?}", err), - Error::InvalidContext(ref err) => write!(f, "{:?}", err), - Error::InvalidImage(ref err) => write!(f, "{:?}", err), - Error::InvalidDevice(ref err) => write!(f, "{:?}", err), - Error::NoDevice(ref err) => write!(f, "{:?}", err), - Error::ProfilerAlreadyStopped(ref err) => write!(f, "{:?}", err), - Error::ProfilerAlreadyStarted(ref err) => write!(f, "{:?}", err), - Error::IllegalAddress(ref err) => write!(f, "{:?}", err), - Error::NotReady(ref err) => write!(f, "{:?}", err), - Error::NotFound(ref err) => write!(f, "{:?}", err), - Error::InvalidHandle(ref err) => write!(f, "{:?}", err), - Error::OperatingSystem(ref err) => write!(f, "{:?}", err), - Error::SharedObjectInitFailed(ref err) => write!(f, "{:?}", err), - Error::SharedObjectSymbolNotFound(ref err) => write!(f, "{:?}", err), - Error::FileNotFound(ref err) => write!(f, "{:?}", err), - Error::InvalidSource(ref err) => write!(f, "{:?}", err), - Error::InvalidGraphicsContent(ref err) => write!(f, "{:?}", err), - Error::InvalidPtx(ref err) => write!(f, "{:?}", err), - Error::PeerAccessUnsupported(ref err) => write!(f, "{:?}", err), - Error::ContextAlreadyInUse(ref err) => write!(f, "{:?}", err), - Error::UnsupportedLimit(ref err) => write!(f, "{:?}", err), - Error::EccUncorrectable(ref err) => write!(f, "{:?}", err), - Error::NotMappedAsPointer(ref err) => write!(f, "{:?}", err), - Error::NotMappedAsArray(ref err) => write!(f, "{:?}", err), - Error::NotMapped(ref err) => write!(f, "{:?}", err), - Error::AlreadyAquired(ref err) => write!(f, "{:?}", err), - Error::Unknown(ref err) => write!(f, "{:?}", err), - Error::NotSupported(ref err) => write!(f, "{:?}", err), - Error::NotPermitted(ref err) => write!(f, "{:?}", err), - Error::LaunchFailed(ref err) => write!(f, "{:?}", err), - Error::InvalidPc(ref err) => write!(f, "{:?}", err), - Error::InvalidAddressSpace(ref err) => write!(f, "{:?}", err), - Error::MisalignedAddress(ref err) => write!(f, "{:?}", err), - Error::IllegalInstruction(ref err) => write!(f, "{:?}", err), - Error::HardwareStackError(ref err) => write!(f, "{:?}", err), - Error::HostMemoryNotRegistered(ref err) => write!(f, "{:?}", err), - Error::HostMemoryAlreadyRegistered(ref err) => write!(f, "{:?}", err), - Error::TooManyPeers(ref err) => write!(f, "{:?}", err), - Error::Assert(ref err) => write!(f, "{:?}", err), - Error::ContextIsDestroyed(ref err) => write!(f, "{:?}", err), - Error::PrimaryContextActive(ref err) => write!(f, "{:?}", err), - Error::PeerAccessNotEnabled(ref err) => write!(f, "{:?}", err), - Error::PeerAccessAlreadyEnabled(ref err) => write!(f, "{:?}", err), - Error::LauncIncompatibleTexturing(ref err) => write!(f, "{:?}", err), - Error::LaunchTimeout(ref err) => write!(f, "{:?}", err), - Error::LaunchOutOfResources(ref err) => write!(f, "{:?}", err), - } - } -} - -impl error::Error for Error { - fn source(&self) -> Option<&(dyn error::Error + 'static)> { - match *self { - Error::ProfilerNotInitialized(_) => None, - Error::ProfilerDisabled(_) => None, - Error::Deinitialized(_) => None, - Error::NotInitialized(_) => None, - Error::OutOfMemory(_) => None, - Error::InvalidValue(_) => None, - Error::NoBinaryForGpu(_) => None, - Error::AlreadyMapped(_) => None, - Error::ArrayIsMapped(_) => None, - Error::UnmapFailed(_) => None, - Error::MapFailed(_) => None, - Error::ContextAlreadyCurrent(_) => None, - Error::InvalidContext(_) => None, - Error::InvalidImage(_) => None, - Error::InvalidDevice(_) => None, - Error::NoDevice(_) => None, - Error::ProfilerAlreadyStopped(_) => None, - Error::ProfilerAlreadyStarted(_) => None, - Error::IllegalAddress(_) => None, - Error::NotReady(_) => None, - Error::NotFound(_) => None, - Error::InvalidHandle(_) => None, - Error::OperatingSystem(_) => None, - Error::SharedObjectInitFailed(_) => None, - Error::SharedObjectSymbolNotFound(_) => None, - Error::FileNotFound(_) => None, - Error::InvalidSource(_) => None, - Error::InvalidGraphicsContent(_) => None, - Error::InvalidPtx(_) => None, - Error::PeerAccessUnsupported(_) => None, - Error::ContextAlreadyInUse(_) => None, - Error::UnsupportedLimit(_) => None, - Error::EccUncorrectable(_) => None, - Error::NotMappedAsPointer(_) => None, - Error::NotMappedAsArray(_) => None, - Error::NotMapped(_) => None, - Error::AlreadyAquired(_) => None, - Error::Unknown(_) => None, - Error::NotSupported(_) => None, - Error::NotPermitted(_) => None, - Error::LaunchFailed(_) => None, - Error::InvalidPc(_) => None, - Error::InvalidAddressSpace(_) => None, - Error::MisalignedAddress(_) => None, - Error::IllegalInstruction(_) => None, - Error::HardwareStackError(_) => None, - Error::HostMemoryNotRegistered(_) => None, - Error::HostMemoryAlreadyRegistered(_) => None, - Error::TooManyPeers(_) => None, - Error::Assert(_) => None, - Error::ContextIsDestroyed(_) => None, - Error::PrimaryContextActive(_) => None, - Error::PeerAccessNotEnabled(_) => None, - Error::PeerAccessAlreadyEnabled(_) => None, - Error::LauncIncompatibleTexturing(_) => None, - Error::LaunchTimeout(_) => None, - Error::LaunchOutOfResources(_) => None, - } - } + #[error("{0}")] + Unknown(&'static str, u64), } diff --git a/coaster/src/frameworks/cuda/api/driver/memory.rs b/coaster/src/frameworks/cuda/api/driver/memory.rs index 915e5c77f..690f34c98 100644 --- a/coaster/src/frameworks/cuda/api/driver/memory.rs +++ b/coaster/src/frameworks/cuda/api/driver/memory.rs @@ -39,7 +39,7 @@ impl API { CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory("Device is out of memory.")), - _ => Err(Error::Unknown("Unable to allocate memory.")), + status => Err(Error::Unknown("Unable to allocate memory.", status as i32 as u64)), } } @@ -50,7 +50,8 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to free memory.")), + status => Err(Error::Unknown("Unable to free memory.", status as i32 as u64)), + } } @@ -65,7 +66,8 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => Err(Error::Unknown("Unable to copy memory from host to device.")), + status => Err(Error::Unknown("Unable to copy memory from host to device.", status as i32 as u64)), + } } @@ -81,7 +83,7 @@ impl API { CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - _ => { println!("{:?}", status); Err(Error::Unknown("Unable to copy memory from device to host.")) }, + status => Err(Error::Unknown("Unable to copy memory from device to host.", status as i32 as u64)), } } } diff --git a/coaster/src/frameworks/cuda/api/driver/utils.rs b/coaster/src/frameworks/cuda/api/driver/utils.rs index 0b6ab8413..5f539d5b2 100644 --- a/coaster/src/frameworks/cuda/api/driver/utils.rs +++ b/coaster/src/frameworks/cuda/api/driver/utils.rs @@ -18,7 +18,8 @@ impl API { CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidDevice("Invalid device.")), CUresult::CUDA_ERROR_NO_DEVICE => Err(Error::NoDevice("Unable to find a CUDA device. Try run `nvidia-smi` on your console.")), - _ => Err(Error::Unknown("Unable to initialze the Cuda Driver API.")), + status => Err(Error::Unknown("Unable to initialze the Cuda Driver API.", status as i32 as u64)), + } } } diff --git a/juice-examples/juice-utils/Cargo.toml b/juice-examples/juice-utils/Cargo.toml index a49a8345e..21797e84a 100644 --- a/juice-examples/juice-utils/Cargo.toml +++ b/juice-examples/juice-utils/Cargo.toml @@ -4,6 +4,7 @@ description = "Utilities for running Juice Examples" version = "0.0.1" authors = ["Lissa Hyacinth ", "Bernhard Schuster "] edition = "2018" +publish = false [dependencies] reqwest= {version = "0.11.2", features = ["blocking"]} diff --git a/juice-examples/mackey-glass-rnn-regression/Cargo.toml b/juice-examples/mackey-glass-rnn-regression/Cargo.toml index 37ef945fb..14f7643de 100644 --- a/juice-examples/mackey-glass-rnn-regression/Cargo.toml +++ b/juice-examples/mackey-glass-rnn-regression/Cargo.toml @@ -4,6 +4,7 @@ description = "Juice Framework example for RNN Regression using Mackey-Glass Dat version = "0.0.1" authors = ["Lissa Hyacinth "] edition = "2018" +publish = false [dependencies] greenglas = { version = "0.2" } diff --git a/juice-examples/mnist-image-multiclass-classification/Cargo.toml b/juice-examples/mnist-image-multiclass-classification/Cargo.toml index 4ceac56b0..ae953b24d 100644 --- a/juice-examples/mnist-image-multiclass-classification/Cargo.toml +++ b/juice-examples/mnist-image-multiclass-classification/Cargo.toml @@ -6,6 +6,7 @@ authors = ["Bernhard Schuster ", "Michael Hirn", "Maximilian Goisser"] edition = "2018" +publish = false [dependencies] greenglas = { version = "0.2" } diff --git a/rcublas/cublas/Cargo.toml b/rcublas/cublas/Cargo.toml index 54eab2529..6046081dd 100644 --- a/rcublas/cublas/Cargo.toml +++ b/rcublas/cublas/Cargo.toml @@ -19,6 +19,7 @@ libc = "0.2" rcublas-sys = { version = "0.4", path = "../cublas-sys" } lazy_static = "1" log = "0.4" +thiserror = "1.0" [dev-dependencies] coaster = { path = "../../coaster", default-features = false, features = ["cuda", "native"], version = "0.1" } diff --git a/rcublas/cublas/src/api/level1.rs b/rcublas/cublas/src/api/level1.rs index 82d081957..b75041a29 100644 --- a/rcublas/cublas/src/api/level1.rs +++ b/rcublas/cublas/src/api/level1.rs @@ -38,7 +38,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => Err(Error::AllocFailed), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to calculate sum of x.")), + status => Err(Error::Unknown("Unable to calculate sum of x.", status as i32 as u64)), + } } @@ -80,7 +81,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).")), + status => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).", status as i32 as u64)), + } } @@ -119,7 +121,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to calculate copy from x to y.")), + status => Err(Error::Unknown("Unable to calculate copy from x to y.", status as i32 as u64)), + } } @@ -154,7 +157,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to calculate dot product of x and y.")), + status => Err(Error::Unknown("Unable to calculate dot product of x and y.", status as i32 as u64)), + } } @@ -188,10 +192,10 @@ impl API { }, cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => { + status => { dbg!("Unknown!"); Err(Error::Unknown( - "Unable to calculate the euclidian norm of x.", + "Unable to calculate the euclidian norm of x.", status as i32 as u64 )) }, } @@ -228,7 +232,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to scale the vector x.")), + status => Err(Error::Unknown("Unable to scale the vector x.", status as i32 as u64)), + } } @@ -261,7 +266,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to swap vector x and y.")), + status => Err(Error::Unknown("Unable to swap vector x and y.", status as i32 as u64)), + } } } diff --git a/rcublas/cublas/src/api/level3.rs b/rcublas/cublas/src/api/level3.rs index b9aa76fcc..b978dda60 100644 --- a/rcublas/cublas/src/api/level3.rs +++ b/rcublas/cublas/src/api/level3.rs @@ -87,7 +87,8 @@ impl API { ), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - _ => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).")), + status => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).", status as i32 as u64)), + } } } diff --git a/rcublas/cublas/src/api/util.rs b/rcublas/cublas/src/api/util.rs index 804c4025f..40e63d395 100644 --- a/rcublas/cublas/src/api/util.rs +++ b/rcublas/cublas/src/api/util.rs @@ -38,7 +38,7 @@ impl TryFrom for Cookie { if let Some(nn) = NonNull::new(handle) { Ok(Cookie(nn)) } else { - Err(Error::Unknown("cublasHandle is a nullptr")) + Err(Error::Unknown("cublasHandle is a nullptr", 0)) } } } @@ -113,8 +113,9 @@ impl API { let version_ptr: *mut i32 = &mut version; match cublasGetVersion_v2(handle, version_ptr) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(version), - cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::Unknown("Unable to initialise CUBLAS Library")), - _ => Err(Error::Unknown("Other Unknown Error with CUBLAS Get Version")), + cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), + status => Err(Error::Unknown("Other Unknown Error with CUBLAS Get Version", status as i32 as u64)), + } } @@ -139,8 +140,8 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => Err(Error::AllocFailed), - _ => Err(Error::Unknown( - "Unable to create the cuBLAS context/resources.", + status => Err(Error::Unknown("Unable to create the cuBLAS context/resources.", status as i32 as u64 + )), } } @@ -149,8 +150,8 @@ impl API { match cublasDestroy_v2(handle) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - _ => Err(Error::Unknown( - "Unable to destroy the CUDA cuDNN context/resources.", + status => Err(Error::Unknown("Unable to destroy the CUDA cuDNN context/resources.", status as i32 as u64 + )), } } @@ -160,7 +161,8 @@ impl API { match cublasGetPointerMode_v2(handle, pointer_mode.as_mut_ptr()) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(pointer_mode[0]), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - _ => Err(Error::Unknown("Unable to get cuBLAS pointer mode.")), + status => Err(Error::Unknown("Unable to get cuBLAS pointer mode.", status as i32 as u64)), + } } @@ -171,7 +173,8 @@ impl API { match cublasSetPointerMode_v2(handle, pointer_mode) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - _ => Err(Error::Unknown("Unable to get cuBLAS pointer mode.")), + status => Err(Error::Unknown("Unable to get cuBLAS pointer mode.", status as i32 as u64)), + } } diff --git a/rcublas/cublas/src/error.rs b/rcublas/cublas/src/error.rs index aa94f4a87..5fde9f5a1 100644 --- a/rcublas/cublas/src/error.rs +++ b/rcublas/cublas/src/error.rs @@ -1,82 +1,39 @@ //! Provides Rust Errors for every cuBLAS status. -use std::{fmt, error}; +#[allow(unused)] +pub type Result = std::result::Result; -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, thiserror::Error)] /// Defines cuBLAS errors. pub enum Error { /// Failure with cuBLAS initialization. + #[error("CUDA Driver/Runtime API not initialized.")] NotInitialized, /// Failure with allocation. + #[error("The resources could not be allocated.")] AllocFailed, /// Failure with cuDNN. + #[error("Internal: {0}")] InternalError(&'static str), /// Failure with provided value. + #[error("Invalid value: {0}")] InvalidValue(&'static str), /// Failure with the hardware architecture. + #[error("cuBLAS only supports devices with compute capabilities greater than or equal to 1.3.")] ArchMismatch, /// Failure with memory access or internal error/bug. + #[error("There was an error accessing GPU memory.")] MappingError, /// Failure with Kernel execution. + #[error("Execution failed to launch on the GPU.")] ExecutionFailed, /// Failure with an unsupported request. + #[error("Not supported: {0}")] NotSupported(&'static str), /// Failure CUDA License. + #[error("There is an error with the license. Check that it is present, unexpired and the NVIDIA_LICENSE_FILE environment variable has been set correctly.")] LicenseError, /// Failure - Unknown(&'static str), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let msg = match *self { - Error::NotInitialized => "Failure with cuBLAS initialization.".to_string(), - Error::AllocFailed => "Failure with allocation.".to_string(), - Error::InternalError(ref err) => (*err).to_string(), - Error::InvalidValue(ref err) => (*err).to_string(), - Error::ArchMismatch => "Failure with the hardware architecture.".to_string(), - Error::MappingError => "Failure with memory access or internal error/bug.".to_string(), - Error::ExecutionFailed => "Failure with Kernel execution.".to_string(), - Error::NotSupported(ref err) => (*err).to_string(), - Error::LicenseError => "Failure CUDA License".to_string(), - Error::Unknown(ref err) => (*err).to_string(), - }; - write!(f, "{:?}", msg) - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::NotInitialized => "CUDA Driver/Runtime API not initialized.", - Error::AllocFailed => "The resources could not be allocated.", - Error::InternalError(ref err) => err, - Error::InvalidValue(ref err) => err, - Error::ArchMismatch => { - "cuBLAS only supports devices with compute capabilities greater than or equal to 1.3." - } - Error::MappingError => "There was an error accessing GPU memory.", - Error::ExecutionFailed => "Execution failed to launch on the GPU.", - Error::NotSupported(ref err) => err, - Error::LicenseError => { - "There is an error with the license. Check that it is present, unexpired and the NVIDIA_LICENSE_FILE environment variable has been set correctly." - } - Error::Unknown(ref err) => err, - } - } - - fn cause(&self) -> Option<&dyn error::Error> { - match *self { - Error::NotInitialized => None, - Error::AllocFailed => None, - Error::InternalError(_) => None, - Error::InvalidValue(_) => None, - Error::ArchMismatch => None, - Error::MappingError => None, - Error::ExecutionFailed => None, - Error::NotSupported(_) => None, - Error::LicenseError => None, - Error::Unknown(_) => None, - } - } + #[error("Unknown error: {0} - code {1}")] + Unknown(&'static str, u64), } diff --git a/rcudnn/cudnn/Cargo.toml b/rcudnn/cudnn/Cargo.toml index 47b096920..ba31c70d6 100644 --- a/rcudnn/cudnn/Cargo.toml +++ b/rcudnn/cudnn/Cargo.toml @@ -19,6 +19,7 @@ license = "MIT OR Apache-2.0" libc = "0.2" rcudnn-sys = { version = "0.4", path = "../cudnn-sys" } num = "0.1" +thiserror = "1.0" [dev-dependencies] coaster = { default-features = false, features = ["native", "cuda"], version = "0.1" } diff --git a/rcudnn/cudnn/src/api/activation.rs b/rcudnn/cudnn/src/api/activation.rs index 1faae34df..b2b6fcfbf 100644 --- a/rcudnn/cudnn/src/api/activation.rs +++ b/rcudnn/cudnn/src/api/activation.rs @@ -92,7 +92,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("`mode` is invalid or dimensions of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute activation forward.")), + status => Err(Error::Unknown("Unable to compute activation forward.", status as i32 as u64)), + } } @@ -116,7 +117,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("`mode` is invalid or dimensions of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("`mode` is invalid or dimensions of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute activation backward.")), + status => Err(Error::Unknown("Unable to compute activation backward.", status as i32 as u64)), + } } @@ -127,8 +129,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN Activation Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Activation Descriptor.", status as i32 as u64 + )), } } @@ -138,8 +140,8 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyActivationDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Activation Descriptor.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Activation Descriptor.", status as i32 as u64 + )), } } @@ -155,8 +157,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), // FIXME - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Activation Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Activation Descriptor.", status as i32 as u64 + )), } } diff --git a/rcudnn/cudnn/src/api/convolution.rs b/rcudnn/cudnn/src/api/convolution.rs index 232ef4d56..56c3f4cb8 100644 --- a/rcudnn/cudnn/src/api/convolution.rs +++ b/rcudnn/cudnn/src/api/convolution.rs @@ -42,8 +42,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN Filter Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Filter Descriptor.", status as i32 as u64 + )), } } @@ -51,8 +51,8 @@ impl API { unsafe fn ffi_destroy_filter_descriptor(desc: cudnnFilterDescriptor_t) -> Result<(), Error> { match cudnnDestroyFilterDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Filter Descriptor.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Filter Descriptor.", status as i32 as u64 + )), } } @@ -72,8 +72,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => { Err(Error::NotSupported("`nb_dims` exceeds CUDNN_DIM_MAX.")) } - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Filter Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Filter Descriptor.", status as i32 as u64 + )), } } @@ -217,7 +217,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(perf_results), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: The handle is not allocated properly. The `src-`, `filter-` or `dest-` descriptor is not allocated properly. The `src-`, `filter-` or `dest-` descriptor has fewer than 1 dimension. Either `returnedCount` or `perfResults` is pointing to NULL. The requestedCount is less than 1.")), cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The resources could not be allocated.")), - _ => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Forward Algorithm.")), + status => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Forward Algorithm.", status as i32 as u64)), + } } @@ -235,7 +236,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `src_desc`, `filter_desc`, `conv_desc`, `dest_desc` is NULL. The tensor `dest_desc` or `filter_desc` are not of the same dimension as `src_desc`. The tensor `src_desc`, `dest_desc` or `filter_desc` are not of the same data type. The numbers of feature maps of the tensor `src_desc` and `filter_desc` differ. The tensor `src_desc` has a dimension smaller than 3.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The combination of the tensor descriptors, filter descriptor and convolution descriptor is not supported for the specified algorithm.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Forward Workspace size.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Forward Workspace size.", status as i32 as u64)), + } } @@ -251,7 +253,8 @@ impl API { cudnnConvolutionBwdFilterAlgoPerf_t::default(), ]; match cudnnFindConvolutionBackwardFilterAlgorithm(handle, src_desc, dest_desc, conv_desc, filter_desc, 2, &mut 0, perf_results.as_mut_ptr()) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(perf_results), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: The handle is not allocated properly. The `src-`, `filter-` or `dest-` descriptor is not allocated properly. The `src-`, `filter-` or `dest-` descriptor has fewer than 1 dimension. Either `returnedCount` or `perfResults` is pointing to NULL. The requestedCount is less than 1.")), cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The resources could not be allocated.")), - _ => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Backward Filter Algorithm.")), + status => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Backward Filter Algorithm.", status as i32 as u64)), + } } @@ -268,7 +271,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `src_desc`, `filter_desc`, `conv_desc`, `dest_desc` is NULL. The tensor `dest_desc` or `filter_desc` are not of the same dimension as `src_desc`. The tensor `src_desc`, `dest_desc` or `filter_desc` are not of the same data type. The numbers of feature maps of the tensor `src_desc` and `filter_desc` differ. The tensor `src_desc` has a dimension smaller than 3.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The combination of the tensor descriptors, filter descriptor and convolution descriptor is not supported for the specified algorithm.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Backward Filter Workspace size.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Backward Filter Workspace size.", status as i32 as u64)), + } } @@ -287,7 +291,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(perf_results), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: The handle is not allocated properly. The `src-`, `filter-` or `dest-` descriptor is not allocated properly. The `src-`, `filter-` or `dest-` descriptor has fewer than 1 dimension. Either `returnedCount` or `perfResults` is pointing to NULL. The requestedCount is less than 1.")), cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The resources could not be allocated.")), - _ => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Backward Data Algorithm.")), + status => Err(Error::Unknown("Unable to find CUDA cuDNN Convolution Backward Data Algorithm.", status as i32 as u64)), + } } @@ -304,7 +309,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `src_desc`, `filter_desc`, `conv_desc`, `dest_desc` is NULL. The tensor `dest_desc` or `filter_desc` are not of the same dimension as `src_desc`. The tensor `src_desc`, `dest_desc` or `filter_desc` are not of the same data type. The numbers of feature maps of the tensor `src_desc` and `filter_desc` differ. The tensor `src_desc` has a dimension smaller than 3.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The combination of the tensor descriptors, filter descriptor and convolution descriptor is not supported for the specified algorithm.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Backward Data Workspace size.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN Convolution Backward Data Workspace size.", status as i32 as u64)), + } } @@ -479,8 +485,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN Convolution Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Convolution Descriptor.", status as i32 as u64 + )), } } @@ -490,8 +496,8 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyConvolutionDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Convolution Descriptor.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Convolution Descriptor.", status as i32 as u64 + )), } } @@ -509,7 +515,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: `desc` is NULL. `array_length` is negative, `mode` or `data_type` is invalid, element of `pad_a` is negative, element of `stride_a` is negative or zero.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `array_length` is greater than CUDNN_DIM_MAX. `upscale_a` contains an element different from 1.")), - _ => Err(Error::Unknown("Unable to set CUDA cuDNN Convolution Descriptor.")), + status => Err(Error::Unknown("Unable to set CUDA cuDNN Convolution Descriptor.", status as i32 as u64)), + } } @@ -548,7 +555,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `filter_desc`, `conv_desc`, `dest_desc`, `src_data`, `alpha`, `beta`. `src_desc` and `dest_desc` have a non-matching number of dimensions. `src_desc` and `filter_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`s number of dimensions is not equal to `conv_desc`s `array_length` + 2. `src_desc` and `filter_desc` have a non-matching number of input feature maps per image. `src_desc`, `filter_desc` and `dest_desc` have a non-matching data type. For some spatial dimension, `filter_desc` has a spatial size that is larger than the input spatial size (including zero-padding size).")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `filter_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional forward.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional forward.", status as i32 as u64)), + } } @@ -564,7 +572,8 @@ impl API { match cudnnConvolutionBackwardBias(handle, alpha, src_desc, src_data, beta, dest_desc, dest_data) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters n,h,w of the output tensor is not 1. The numbers of feature maps of the input tensor and output tensor differ. The dataType of the two tensor descriptors are different.")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward bias.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward bias.", status as i32 as u64)), + } } @@ -590,7 +599,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `diff_desc` have negative tensor striding. `src_desc`, `diff_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the filter data.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward filter.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward filter.", status as i32 as u64)), + } } @@ -616,7 +626,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `diff_desc` or `grad_desc` have negative tensor striding. `diff_desc`, `filter_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the filter data or the input differential tensor data.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward data.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN convolutional backward data.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/cuda.rs b/rcudnn/cudnn/src/api/cuda.rs index 8ed06be41..9980a5e50 100644 --- a/rcudnn/cudnn/src/api/cuda.rs +++ b/rcudnn/cudnn/src/api/cuda.rs @@ -28,8 +28,8 @@ impl API { cudaError_t::cudaErrorMemoryAllocation => { Err(Error::AllocFailed("Unable to allocate CUDA device memory.")) } - _ => Err(Error::Unknown( - "Unable to allocate CUDA device memory for unknown reasons.", + status => Err(Error::Unknown("Unable to allocate CUDA device memory for unknown reasons.", status as i32 as u64 + )), } } @@ -39,12 +39,13 @@ impl API { cudaError_t::cudaSuccess => Ok(()), // TODO, more error enums sigh cudaError_t::cudaErrorInvalidDevicePointer => { - Err(Error::Unknown("Unable to free the CUDA device memory.")) + Err(Error::InvalidValue("Unable to free the CUDA device memory due to invalid device pointer.")) } cudaError_t::cudaErrorInitializationError => { - Err(Error::Unknown("CUDA Driver/Runtime API not initialized.")) + Err(Error::NotInitialized("CUDA Driver/Runtime API not initialized.")) } - _ => Err(Error::Unknown("Unable to free the CUDA device memory.")), + status => Err(Error::Unknown("Unable to free the CUDA device memory.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/dropout.rs b/rcudnn/cudnn/src/api/dropout.rs index 7792d5792..bbe29b8a2 100644 --- a/rcudnn/cudnn/src/api/dropout.rs +++ b/rcudnn/cudnn/src/api/dropout.rs @@ -109,8 +109,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated")) } - _ => Err(Error::Unknown( - "Unable create generic CUDA cuDNN Dropout Descriptor", + status => Err(Error::Unknown("Unable create generic CUDA cuDNN Dropout Descriptor", status as i32 as u64 + )), } } @@ -119,8 +119,8 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyDropoutDescriptor(dropout_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Dropout Descriptor", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Dropout Descriptor", status as i32 as u64 + )), } } @@ -128,8 +128,8 @@ impl API { let mut size_in_bytes: usize = 0; match cudnnDropoutGetStatesSize(handle, &mut size_in_bytes) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size_in_bytes), - _ => Err(Error::Unknown( - "Unable to get CUDA cuDNN Dropout Descriptor states size", + status => Err(Error::Unknown("Unable to get CUDA cuDNN Dropout Descriptor states size", status as i32 as u64 + )), } } @@ -139,8 +139,8 @@ impl API { let mut size_in_bytes: usize = 0; match cudnnDropoutGetReserveSpaceSize(xdesc, &mut size_in_bytes) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size_in_bytes), - _ => Err(Error::Unknown( - "Unable to get CUDA cuDNN Dropout Descriptor reserved space size", + status => Err(Error::Unknown("Unable to get CUDA cuDNN Dropout Descriptor reserved space size", status as i32 as u64 + )), } } @@ -167,8 +167,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed( "The function failed to launch on the GPU", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Dropout Descriptor", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Dropout Descriptor", status as i32 as u64 + )), } } @@ -204,7 +204,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed( "The function failed to launch on the GPU.", )), - _ => Err(Error::Unknown("Unable to calculate CUDA cuDNN Dropout forward")), + status => Err(Error::Unknown("Unable to calculate CUDA cuDNN Dropout forward", status as i32 as u64)), + } } @@ -239,7 +240,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed( "The function failed to launch on the GPU.", )), - _ => Err(Error::Unknown("Unable to calculate CUDA cuDNN Dropout backward")), + status => Err(Error::Unknown("Unable to calculate CUDA cuDNN Dropout backward", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/normalization.rs b/rcudnn/cudnn/src/api/normalization.rs index 1275cfc36..0e59ad58b 100644 --- a/rcudnn/cudnn/src/api/normalization.rs +++ b/rcudnn/cudnn/src/api/normalization.rs @@ -142,8 +142,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN LRN Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN LRN Descriptor.", status as i32 as u64 + )), } } @@ -151,8 +151,8 @@ impl API { unsafe fn ffi_destroy_lrn_descriptor(desc: cudnnLRNDescriptor_t) -> Result<(), Error> { match cudnnDestroyLRNDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN LRN Descriptor.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN LRN Descriptor.", status as i32 as u64 + )), } } @@ -169,8 +169,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "One of the input parameters was out of range.", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Pooling Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor.", status as i32 as u64 + )), } } @@ -192,7 +192,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the tensor pointers `src_data`, `dest_data` is NULL. Number of input tensor dimensions is 2 or less. LRN Descriptor params are out of valid range. Input Tensor is 5D but is not NCDHW DHW-packed format.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("`data_type`, `sride`, `dimensions` mismatch or tensor strides are negative.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute LRN cross channel forward.")), + status => Err(Error::Unknown("Unable to compute LRN cross channel forward.", status as i32 as u64)), + } } @@ -217,7 +218,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the tensor pointers `src_data`, `dest_data` is NULL. Number of input tensor dimensions is 2 or less. LRN Descriptor params are out of valid range. Input Tensor is 5D but is not NCDHW DHW-packed format.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("`data_type`, `sride`, `dimensions` mismatch or tensor strides are negative.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute LRN cross channel backward.")), + status => Err(Error::Unknown("Unable to compute LRN cross channel backward.", status as i32 as u64)), + } } @@ -241,7 +243,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the tensor pointers `src_data`, `dest_data`, `tmp_data`, `tmp_data2` is NULL. Number of input tensor or output tensor dimensions is outside of [4,5] range. A mismatch in dimensions between any two of the input or output tensors. For in-place computation (`src_data` == `dest_data`), a mismatch in strides between the input data and output data tensors. Alpha or beta pointer is NULL. LRN descriptor parameters are outside or their valid ranges. Any of the tensor strides are negative.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("stried of the input and output tensors mismatch.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute divisive normalization forward.")), + status => Err(Error::Unknown("Unable to compute divisive normalization forward.", status as i32 as u64)), + } } @@ -267,7 +270,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the tensor pointers `src_data`, `dest_data`, `tmp_data`, `tmp_data2` is NULL. Number of input tensor or output tensor dimensions is outside of [4,5] range. A mismatch in dimensions between any two of the input or output tensors. For in-place computation (`src_data` == `dest_data`), a mismatch in strides between the input data and output data tensors. Alpha or beta pointer is NULL. LRN descriptor parameters are outside or their valid ranges. Any of the tensor strides are negative.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("`mode` is invalid or dimensions of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute divisive normalization backward.")), + status => Err(Error::Unknown("Unable to compute divisive normalization backward.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/pooling.rs b/rcudnn/cudnn/src/api/pooling.rs index a75719be2..24cfb8c5f 100644 --- a/rcudnn/cudnn/src/api/pooling.rs +++ b/rcudnn/cudnn/src/api/pooling.rs @@ -202,8 +202,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN Pooling Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Pooling Descriptor.", status as i32 as u64 + )), } } @@ -211,8 +211,8 @@ impl API { unsafe fn ffi_destroy_pooling_descriptor(desc: cudnnPoolingDescriptor_t) -> Result<(), Error> { match cudnnDestroyPoolingDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Pooling Descriptor.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Pooling Descriptor.", status as i32 as u64 + )), } } @@ -239,8 +239,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Pooling Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor.", status as i32 as u64 + )), } } @@ -270,8 +270,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - _ => Err(Error::Unknown( - "Unable to get CUDA cuDNN Pooling Descriptor.", + status => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Descriptor.", status as i32 as u64 + )), } } @@ -303,8 +303,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Pooling Descriptor 2D.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor 2D.", status as i32 as u64 + )), } } @@ -336,8 +336,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - _ => Err(Error::Unknown( - "Unable to get CUDA cuDNN Pooling Descriptor 2D.", + status => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Descriptor 2D.", status as i32 as u64 + )), } } @@ -351,7 +351,8 @@ impl API { match cudnnGetPoolingNdForwardOutputDim(pooling_desc, input_desc, nb_dims, out_dim_a) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("`pooling_desc` not initialized or `nb_dims` is inconsistent with `pooling_desc` and `input_desc`.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Forward Output dimensions.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Forward Output dimensions.", status as i32 as u64)), + } } @@ -371,7 +372,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: The dimensions n, c of the input tensor and output tensors differ. The datatype of the input tensor and output tensors differs.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The `w_stride` of input tensor or output tensor is not 1.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute pooling forward.")), + status => Err(Error::Unknown("Unable to compute pooling forward.", status as i32 as u64)), + } } @@ -395,7 +397,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: The dimensions n,c,h,w of the `src_desc` and `src_diff_desc` tensors differ. The strides nStride, cStride, hStride, wStride of the `src_desc` and `src_diff_desc` tensors differ. The dimensions n,c,h,w of the `dest_desc` and `dest_diff_desc` tensors differ. The strides nStride, cStride, hStride, wStride of the `dest_desc` and `dest_diff_desc` tensors differ. The datatype of the four tensors differ.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The `w_stride` of input tensor or output tensor is not 1.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute pooling backward.")), + status => Err(Error::Unknown("Unable to compute pooling backward.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/rnn.rs b/rcudnn/cudnn/src/api/rnn.rs index 04757227b..61b4eb599 100644 --- a/rcudnn/cudnn/src/api/rnn.rs +++ b/rcudnn/cudnn/src/api/rnn.rs @@ -46,7 +46,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `src_desc` is not supported for RNN.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Forward Workspace size.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Forward Workspace size.", status as i32 as u64)), + } } @@ -82,7 +83,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `src_desc` is not supported for RNN.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Training Reserve size.")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Training Reserve size.", status as i32 as u64)), + } } /// cudnnGetRNNParamsSize[1] @@ -120,7 +122,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("One of the following; rnnDesc is invalid, x_desc is invalid, x_desc isn't fully packed, dataType & tensor Description type don't match")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The data type used in `rnn_desc` is not supported for RNN.")), - _ => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Params Size")), + status => Err(Error::Unknown("Unable to get CUDA cuDNN RNN Params Size", status as i32 as u64)), + } } } @@ -138,8 +141,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated")) } - _ => Err(Error::Unknown( - "Unable create generic CUDA cuDNN RNN Descriptor", + status => Err(Error::Unknown("Unable create generic CUDA cuDNN RNN Descriptor", status as i32 as u64 + )), } } @@ -155,7 +158,8 @@ impl API { let mut rnn_data_descriptor: cudnnRNNDataDescriptor_t = ::std::ptr::null_mut(); match cudnnCreateRNNDataDescriptor(&mut rnn_data_descriptor) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(rnn_data_descriptor), - _ => Err(Error::Unknown("Unable to create Data Descriptor")) + status => Err(Error::Unknown("Unable to create Data Descriptor", status as i32 as u64)), + } } @@ -168,8 +172,8 @@ impl API { unsafe fn ffi_destroy_rnn_descriptor(rnn_desc: cudnnRNNDescriptor_t) -> Result<(), Error> { match cudnnDestroyRNNDescriptor(rnn_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Dropout Descriptor", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Dropout Descriptor", status as i32 as u64 + )), } } @@ -237,7 +241,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), - _ => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Descriptor.")), + status => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Descriptor.", status as i32 as u64)), + } } @@ -259,7 +264,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), - _ => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Matrix Math Type.")), + status => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Matrix Math Type.", status as i32 as u64)), + } } @@ -283,7 +289,8 @@ impl API { ) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("cudnnSetRnnPaddingMode - Bad Param - Either RNN Desc is Null or paddingMode has an invalid enum (Unlikely due to Bindgen. Likely RNN Desc is somehow NULL")), - _ => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Padding Mode.")), + status => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Padding Mode.", status as i32 as u64)), + } } } @@ -423,7 +430,10 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions was met: rnnDesc is invalid, hx_desc, w_desc, hy_desc, cy_desc, or one of the x_desc or y_desc is invalid. The descriptors for x_desc, cx_desc, _hx_desc, w_desc, y_desc, hy_desc, cy_desc have incorrect strides/diemnsions. Workspace size is too small. Reserve space size is too small.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `rnn_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.")), + cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("The function failed to launch on the GPU.")), + cudnnStatus_t::CUDNN_STATUS_INVALID_VALUE => Err(Error::InvalidValue("cudnnSetPersistentRNNPlan() was not called prior to the current function when CUDNN_RNN_ALGO_PERSIST_DYNAMIC was selected in the RNN descriptor.")), + cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The function was unable to allocate memory.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN rnn forward.", status as u64)), } } @@ -554,7 +564,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: At least one of the following is NULL: `handle`, `src_desc`, `rnn_desc`, `conv_desc`, `dest_desc`, `src_data`, `alpha`, `beta`. `src_desc` and `dest_desc` have a non-matching number of dimensions. `src_desc` and `rnn_desc` have a non-matching number of dimensions. `src_desc` has fewer than three number of dimensions. `src_desc`s number of dimensions is not equal to `conv_desc`s `array_length` + 2. `src_desc` and `rnn_desc` have a non-matching number of input feature maps per image. `src_desc`, `rnn_desc` and `dest_desc` have a non-matching data type. For some spatial dimension, `rnn_desc` has a spatial size that is larger than the input spatial size (including zero-padding size).")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `dest_desc` have negative tensor striding. `src_desc`, `rnn_desc` or `dest_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal forward.", status as i32 as u64)), + } } } @@ -730,7 +741,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `diff_desc` or `grad_desc` have negative tensor striding. `diff_desc`, `rnn_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data or the input differential tensor data.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward data.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward data.", status as i32 as u64)), + } } @@ -839,7 +851,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("At least one of the following conditions are met: `src_desc` or `diff_desc` have negative tensor striding. `src_desc`, `diff_desc` or `grad_desc` has a number of dimensions that is not 4 or 5. The chosen algo does not support the parameters provided; see the reference for exhaustive list of parameter support for each algo")), cudnnStatus_t::CUDNN_STATUS_MAPPING_ERROR => Err(Error::MappingError("An error occurs during the texture binding of the rnn data.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward rnn.")), + status => Err(Error::Unknown("Unable to compute CUDA cuDNN rnnal backward rnn.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/softmax.rs b/rcudnn/cudnn/src/api/softmax.rs index 0456eb074..993664f45 100644 --- a/rcudnn/cudnn/src/api/softmax.rs +++ b/rcudnn/cudnn/src/api/softmax.rs @@ -72,7 +72,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("`algorithm` or `mode` are invalid or dimensions or data types of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute softmax forward.")), + status => Err(Error::Unknown("Unable to compute softmax forward.", status as i32 as u64)), + } } @@ -94,7 +95,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("`algorithm` or `mode` are invalid or dimensions or data types of input and output tensor differ or `data_type` or strides of the tensors differ.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to compute softmax backward.")), + status => Err(Error::Unknown("Unable to compute softmax backward.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/tensor.rs b/rcudnn/cudnn/src/api/tensor.rs index 043459e07..d5376aa9b 100644 --- a/rcudnn/cudnn/src/api/tensor.rs +++ b/rcudnn/cudnn/src/api/tensor.rs @@ -134,8 +134,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - _ => Err(Error::Unknown( - "Unable to create generic CUDA cuDNN Tensor Descriptor.", + status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Tensor Descriptor.", status as i32 as u64 + )), } } @@ -145,8 +145,8 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyTensorDescriptor(tensor_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - _ => Err(Error::Unknown( - "Unable to destroy CUDA cuDNN Tensor Descriptor context.", + status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Tensor Descriptor context.", status as i32 as u64 + )), } } @@ -166,8 +166,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported( "`nb_dims` exceeds CUDNN_DIM_MAX or 2 Giga-elements.", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Tensor Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor Descriptor.", status as i32 as u64 + )), } } @@ -195,8 +195,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported( "`nb_dims` exceeds CUDNN_DIM_MAX or 2 Giga-elements.", )), - _ => Err(Error::Unknown( - "Unable to set CUDA cuDNN Tensor Descriptor.", + status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor Descriptor.", status as i32 as u64 + )), } } @@ -214,7 +214,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("the dimensions n, c, h, w or the data type of the two tensor descriptors are different.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to transform CUDA cuDNN Tensor.")), + status => Err(Error::Unknown("Unable to transform CUDA cuDNN Tensor.", status as i32 as u64)), + } } @@ -232,7 +233,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("The dimensions of the bias tensor refer to an amount of data that is incompatible the output tensor dimensions or the data type of the two tensor descriptors are different.")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("The dimensions of the bias tensor and the output tensor dimensions are above 5.")), cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed("Execution failed to launch on GPU.")), - _ => Err(Error::Unknown("Unable to add CUDA cuDNN Tensor.")), + status => Err(Error::Unknown("Unable to add CUDA cuDNN Tensor.", status as i32 as u64)), + } } @@ -250,7 +252,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => { Err(Error::ExecutionFailed("Execution failed to launch on GPU.")) } - _ => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor.")), + status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor.", status as i32 as u64)), + } } @@ -268,7 +271,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => { Err(Error::ExecutionFailed("Execution failed to launch on GPU.")) } - _ => Err(Error::Unknown("Unable to scale CUDA cuDNN Tensor.")), + status => Err(Error::Unknown("Unable to scale CUDA cuDNN Tensor.", status as i32 as u64)), + } } } diff --git a/rcudnn/cudnn/src/api/utils.rs b/rcudnn/cudnn/src/api/utils.rs index 00a89356f..eecad34b3 100644 --- a/rcudnn/cudnn/src/api/utils.rs +++ b/rcudnn/cudnn/src/api/utils.rs @@ -38,7 +38,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA Driver/Runtime API not initialized.")), cudnnStatus_t::CUDNN_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch("cuDNN only supports devices with compute capabilities greater than or equal to 3.0.")), cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => Err(Error::AllocFailed("The resources could not be allocated.")), - _ => Err(Error::Unknown("Unable to create the CUDA cuDNN context/resources.")) + status => Err(Error::Unknown("Unable to create the CUDA cuDNN context/resources.", status as i32 as u64)), + } } @@ -48,8 +49,8 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized( "CUDA Driver/Runtime API not initialized.", )), - _ => Err(Error::Unknown( - "Unable to destroy the CUDA cuDNN context/resources.", + status => Err(Error::Unknown("Unable to destroy the CUDA cuDNN context/resources.", status as i32 as u64 + )), } } diff --git a/rcudnn/cudnn/src/error.rs b/rcudnn/cudnn/src/error.rs index 131b139fc..3f40a0599 100644 --- a/rcudnn/cudnn/src/error.rs +++ b/rcudnn/cudnn/src/error.rs @@ -1,82 +1,43 @@ //! Provides Rust Errors for CUDA's cuDNN status. -use std::{error, fmt}; +#[allow(unused)] +pub type Result = std::result::Result; -#[derive(Debug, Copy, Clone)] +#[non_exhaustive] +#[derive(Debug, Copy, Clone, thiserror::Error)] /// Defines CUDA's cuDNN errors. pub enum Error { /// Failure with CUDA cuDNN initialization. + #[error("{0:?}")] NotInitialized(&'static str), /// Failure with allocation. + #[error("{0:?}")] AllocFailed(&'static str), /// Failure with a provided parameter. + #[error("{0:?}")] BadParam(&'static str), /// Failure with cuDNN. + #[error("{0:?}")] InternalError(&'static str), /// Failure with provided value. + #[error("{0:?}")] InvalidValue(&'static str), /// Failure with the hardware architecture. + #[error("{0:?}")] ArchMismatch(&'static str), /// Failure with memory access or internal error/bug. + #[error("{0:?}")] MappingError(&'static str), /// Failure with Kernel execution. + #[error("{0:?}")] ExecutionFailed(&'static str), /// Failure with an unsupported request. + #[error("{0:?}")] NotSupported(&'static str), /// Failure CUDA License. + #[error("{0:?}")] LicenseError(&'static str), /// Failure - Unknown(&'static str), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::NotInitialized(ref err) => write!(f, "{:?}", err), - Error::AllocFailed(ref err) => write!(f, "{:?}", err), - Error::BadParam(ref err) => write!(f, "{:?}", err), - Error::InternalError(ref err) => write!(f, "{:?}", err), - Error::InvalidValue(ref err) => write!(f, "{:?}", err), - Error::ArchMismatch(ref err) => write!(f, "{:?}", err), - Error::MappingError(ref err) => write!(f, "{:?}", err), - Error::ExecutionFailed(ref err) => write!(f, "{:?}", err), - Error::NotSupported(ref err) => write!(f, "{:?}", err), - Error::LicenseError(ref err) => write!(f, "{:?}", err), - Error::Unknown(ref err) => write!(f, "{:?}", err), - } - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::NotInitialized(ref err) => err, - Error::AllocFailed(ref err) => err, - Error::BadParam(ref err) => err, - Error::InternalError(ref err) => err, - Error::InvalidValue(ref err) => err, - Error::ArchMismatch(ref err) => err, - Error::MappingError(ref err) => err, - Error::ExecutionFailed(ref err) => err, - Error::NotSupported(ref err) => err, - Error::LicenseError(ref err) => err, - Error::Unknown(ref err) => err, - } - } - - fn cause(&self) -> Option<&dyn error::Error> { - match *self { - Error::NotInitialized(_) => None, - Error::AllocFailed(_) => None, - Error::BadParam(_) => None, - Error::InternalError(_) => None, - Error::InvalidValue(_) => None, - Error::ArchMismatch(_) => None, - Error::MappingError(_) => None, - Error::ExecutionFailed(_) => None, - Error::NotSupported(_) => None, - Error::LicenseError(_) => None, - Error::Unknown(_) => None, - } - } + #[error("{0:?}: {1}")] + Unknown(&'static str, u64), } From e5291dcca69e81521b28da2e0ad86f13e9ede6a4 Mon Sep 17 00:00:00 2001 From: Bernhard Schuster Date: Wed, 31 Mar 2021 18:57:50 +0200 Subject: [PATCH 2/2] chore/fmt: cargo fmt --- .vscode/settings.json | 19 + coaster-blas/src/frameworks/cuda/helper.rs | 12 +- coaster-blas/tests/blas_specs.rs | 1 - coaster-nn/src/frameworks/cuda/helper.rs | 20 +- coaster-nn/src/frameworks/cuda/mod.rs | 1579 +++++++++-------- coaster-nn/src/frameworks/native/helper.rs | 252 +-- coaster-nn/src/frameworks/native/mod.rs | 967 +++++----- coaster-nn/src/lib.rs | 10 +- coaster-nn/src/plugin.rs | 365 ++-- coaster-nn/src/tests/activation.rs | 336 ++-- coaster-nn/src/tests/bench_all.rs | 41 +- coaster-nn/src/tests/convolutional.rs | 189 +- coaster-nn/src/tests/dropout.rs | 41 +- coaster-nn/src/tests/mod.rs | 146 +- coaster-nn/src/tests/pooling.rs | 172 +- coaster-nn/src/tests/softmax.rs | 192 +- coaster/benches/shared_tensor.rs | 57 +- coaster/examples/readme.rs | 24 +- coaster/src/backend.rs | 26 +- coaster/src/device.rs | 33 +- coaster/src/framework.rs | 12 +- .../src/frameworks/cuda/api/driver/context.rs | 96 +- .../src/frameworks/cuda/api/driver/device.rs | 188 +- coaster/src/frameworks/cuda/api/driver/ffi.rs | 727 ++++---- .../src/frameworks/cuda/api/driver/memory.rs | 116 +- coaster/src/frameworks/cuda/api/driver/mod.rs | 4 +- .../src/frameworks/cuda/api/driver/utils.rs | 18 +- coaster/src/frameworks/cuda/api/mod.rs | 2 +- coaster/src/frameworks/cuda/context.rs | 36 +- coaster/src/frameworks/cuda/device.rs | 30 +- coaster/src/frameworks/cuda/memory.rs | 4 +- coaster/src/frameworks/cuda/mod.rs | 47 +- coaster/src/frameworks/cuda/module.rs | 4 +- coaster/src/frameworks/mod.rs | 8 +- coaster/src/frameworks/native/binary.rs | 4 +- coaster/src/frameworks/native/device.rs | 34 +- coaster/src/frameworks/native/error.rs | 3 +- coaster/src/frameworks/native/flatbox.rs | 18 +- coaster/src/frameworks/native/hardware.rs | 7 +- coaster/src/frameworks/native/mod.rs | 25 +- .../src/frameworks/native/unstable_alloc.rs | 4 +- coaster/src/frameworks/opencl/api/context.rs | 246 +-- coaster/src/frameworks/opencl/api/device.rs | 110 +- coaster/src/frameworks/opencl/api/error.rs | 2 +- coaster/src/frameworks/opencl/api/ffi.rs | 207 ++- coaster/src/frameworks/opencl/api/memory.rs | 92 +- coaster/src/frameworks/opencl/api/mod.rs | 4 +- coaster/src/frameworks/opencl/api/platform.rs | 20 +- coaster/src/frameworks/opencl/api/queue.rs | 81 +- coaster/src/frameworks/opencl/api/types.rs | 579 +++--- coaster/src/frameworks/opencl/context.rs | 68 +- coaster/src/frameworks/opencl/device.rs | 46 +- coaster/src/frameworks/opencl/kernel.rs | 2 +- coaster/src/frameworks/opencl/memory.rs | 4 +- coaster/src/frameworks/opencl/mod.rs | 32 +- coaster/src/frameworks/opencl/program.rs | 10 +- coaster/src/frameworks/opencl/queue.rs | 20 +- coaster/src/hardware.rs | 2 +- coaster/src/lib.rs | 33 +- coaster/src/tensor.rs | 113 +- coaster/tests/backend_specs.rs | 4 +- coaster/tests/compiletests.rs | 3 +- coaster/tests/framework_cuda_specs.rs | 2 +- coaster/tests/framework_opencl_specs.rs | 26 +- coaster/tests/hardware_specs.rs | 12 +- coaster/tests/shared_memory_specs.rs | 38 +- coaster/tests/tensor_specs.rs | 14 +- greenglas/src/image/mod.rs | 33 +- greenglas/src/image/modifiers.rs | 8 +- greenglas/src/lib.rs | 9 +- greenglas/src/transformer.rs | 18 +- greenglas/src/word/mod.rs | 1 - greenglas/src/word/modifiers.rs | 1 + greenglas/tests/image_spec.rs | 45 +- greenglas/tests/transformer_spec.rs | 29 +- greenglas/tests/word_spec.rs | 7 +- juice-examples/juice-utils/src/lib.rs | 10 +- .../mackey-glass-rnn-regression/src/main.rs | 1 - .../src/main.rs | 46 +- juice/src/layer.rs | 3 +- juice/src/layers/common/convolution.rs | 4 +- juice/src/layers/common/linear.rs | 2 +- juice/src/layers/common/rnn.rs | 25 +- juice/src/layers/loss/mean_squared_error.rs | 2 +- juice/src/util.rs | 4 +- juice/tests/layer_specs.rs | 1 - magic.yml | 18 + rcublas/cublas-sys/build.rs | 9 +- rcublas/cublas-sys/src/generated.rs | 2 - rcublas/cublas/src/api/context.rs | 25 +- rcublas/cublas/src/api/enums.rs | 4 +- rcublas/cublas/src/api/level1.rs | 53 +- rcublas/cublas/src/api/level3.rs | 38 +- rcublas/cublas/src/api/util.rs | 72 +- rcublas/cublas/src/chore.rs | 35 +- rcublas/cublas/src/error.rs | 4 +- rcublas/cublas/src/lib.rs | 2 +- rcudnn/cudnn-sys/build.rs | 5 +- rcudnn/cudnn-sys/src/generated.rs | 2 - rcudnn/cudnn-sys/src/lib.rs | 1 - rcudnn/cudnn/benches/cudnn_overhead.rs | 1 - rcudnn/cudnn/src/api/activation.rs | 15 +- rcudnn/cudnn/src/api/convolution.rs | 25 +- rcudnn/cudnn/src/api/cuda.rs | 25 +- rcudnn/cudnn/src/api/dropout.rs | 25 +- rcudnn/cudnn/src/api/normalization.rs | 15 +- rcudnn/cudnn/src/api/pooling.rs | 30 +- rcudnn/cudnn/src/api/rnn.rs | 189 +- rcudnn/cudnn/src/api/tensor.rs | 32 +- rcudnn/cudnn/src/api/utils.rs | 11 +- rcudnn/cudnn/src/cuda.rs | 10 +- rcudnn/cudnn/src/cudnn.rs | 47 +- rcudnn/cudnn/src/dropout_descriptor.rs | 4 +- rcudnn/cudnn/src/lib.rs | 4 +- rcudnn/cudnn/src/rnn_descriptor.rs | 33 +- rcudnn/cudnn/src/tensor_descriptor.rs | 5 +- rcudnn/cudnn/src/utils.rs | 16 +- rcudnn/cudnn/tests/cudnn_specs.rs | 10 +- rust-blas/src/math/mod.rs | 2 +- 119 files changed, 4974 insertions(+), 3683 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 magic.yml diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..4ba3dad97 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,19 @@ +{ + "workbench.colorCustomizations": { + "activityBar.activeBackground": "#19c1ab", + "activityBar.activeBorder": "#af1ac6", + "activityBar.background": "#19c1ab", + "activityBar.foreground": "#15202b", + "activityBar.inactiveForeground": "#15202b99", + "activityBarBadge.background": "#af1ac6", + "activityBarBadge.foreground": "#e7e7e7", + "statusBar.background": "#139483", + "statusBar.foreground": "#e7e7e7", + "statusBarItem.hoverBackground": "#19c1ab", + "titleBar.activeBackground": "#139483", + "titleBar.activeForeground": "#e7e7e7", + "titleBar.inactiveBackground": "#13948399", + "titleBar.inactiveForeground": "#e7e7e799" + }, + "peacock.remoteColor": "#139483" +} \ No newline at end of file diff --git a/coaster-blas/src/frameworks/cuda/helper.rs b/coaster-blas/src/frameworks/cuda/helper.rs index 5dec70d19..110c82521 100644 --- a/coaster-blas/src/frameworks/cuda/helper.rs +++ b/coaster-blas/src/frameworks/cuda/helper.rs @@ -56,7 +56,7 @@ macro_rules! iblas_asum_for_cuda { let x_mem = read!(x, self); let r_mem = write_only!(result, self); - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( asum, (*ctx).asum(trans!(x_mem, $t), trans!(r_mem, $t), n, None) @@ -131,7 +131,7 @@ macro_rules! iblas_nrm2_for_cuda { let x_mem = read!(x, self); let r_mem = write_only!(result, self); - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( nrm2, @@ -155,7 +155,7 @@ macro_rules! iblas_dot_for_cuda { let x_mem = read!(x, self); let y_mem = read!(y, self); let r_mem = write_only!(result, self); - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( dot, (*ctx).dot( @@ -183,7 +183,7 @@ macro_rules! iblas_scal_for_cuda { let n = x.desc().size() as i32; let a_mem = read!(a, self); let x_mem = read_write!(x, self); - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( scal, @@ -205,7 +205,7 @@ macro_rules! iblas_swap_for_cuda { let n = x.desc().size() as i32; let x_mem = read_write!(x, self); let y_mem = read_write!(y, self); - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( swap, @@ -273,7 +273,7 @@ macro_rules! iblas_gemm_for_cuda { let ldb = b_1; let ldc = c_1; - let ctx : &cublas::Context = self.framework().cublas(); + let ctx: &cublas::Context = self.framework().cublas(); exec!( gemm, diff --git a/coaster-blas/tests/blas_specs.rs b/coaster-blas/tests/blas_specs.rs index 28da9f8cd..cebfab48a 100644 --- a/coaster-blas/tests/blas_specs.rs +++ b/coaster-blas/tests/blas_specs.rs @@ -354,7 +354,6 @@ macro_rules! test_blas { test_nrm2::<$t, _>($backend_getter()); } - #[test] fn it_computes_correct_scal() { test_scal::<$t, _>($backend_getter()); diff --git a/coaster-nn/src/frameworks/cuda/helper.rs b/coaster-nn/src/frameworks/cuda/helper.rs index 5e01b318a..8716c0b4e 100644 --- a/coaster-nn/src/frameworks/cuda/helper.rs +++ b/coaster-nn/src/frameworks/cuda/helper.rs @@ -1,33 +1,33 @@ //! Provides useful macros for easier NN implementation for CUDA/cuDNN. macro_rules! read { - ($x:ident, $slf:ident) => ( + ($x:ident, $slf:ident) => { $x.read($slf.device()).unwrap() - ) + }; } macro_rules! read_write { - ($x:ident, $slf:ident) => ( + ($x:ident, $slf:ident) => { $x.read_write($slf.device()).unwrap() - ) + }; } macro_rules! write_only { - ($x:ident, $slf:ident) => ( + ($x:ident, $slf:ident) => { $x.write_only($slf.device()).unwrap() - ) + }; } // trans! cannot be inlined into macros above, because `$mem` would become // intermidiate variable and `*mut $t` will outlive it. macro_rules! trans { - ($mem:ident) => ( + ($mem:ident) => { unsafe { ::std::mem::transmute::(*$mem.id_c()) } - ) + }; } macro_rules! trans_mut { - ($mem:ident) => ( + ($mem:ident) => { unsafe { ::std::mem::transmute::(*$mem.id_c()) } - ) + }; } diff --git a/coaster-nn/src/frameworks/cuda/mod.rs b/coaster-nn/src/frameworks/cuda/mod.rs index 729e8344b..04456f6a3 100644 --- a/coaster-nn/src/frameworks/cuda/mod.rs +++ b/coaster-nn/src/frameworks/cuda/mod.rs @@ -1,24 +1,25 @@ //! Provides NN for a CUDA backend. #![allow(missing_docs)] -use crate::co::Error; -use crate::co::plugin::Error as PluginError; use crate::co::plugin::numeric_helpers::Float; +use crate::co::plugin::Error as PluginError; use crate::co::prelude::*; -use crate::cudnn::*; +use crate::co::Error; pub use crate::cudnn::utils::{DataType, DataTypeInfo}; +use crate::cudnn::*; use crate::plugin::*; #[macro_use] pub mod helper; -fn rnn_sequence_descriptors(sequence_length: i32, - input_size: i32, - hidden_size: i32, - batch_size: i32, - num_layers: i32, - data_type: DataType) - -> Result { +fn rnn_sequence_descriptors( + sequence_length: i32, + input_size: i32, + hidden_size: i32, + batch_size: i32, + num_layers: i32, + data_type: DataType, +) -> Result { let mut x_desc: Vec = Vec::with_capacity(sequence_length as usize); let mut y_desc: Vec = Vec::with_capacity(sequence_length as usize); let mut dxdesc: Vec = Vec::with_capacity(sequence_length as usize); @@ -29,29 +30,17 @@ fn rnn_sequence_descriptors(sequence_length: i32, let dim_hidden_cell = vec![num_layers, batch_size, hidden_size]; let stride_input = vec![dim_input[2] * dim_input[1], dim_input[2], 1]; let stride_output = vec![dim_output[2] * dim_output[1], dim_output[2], 1]; - let stride_hidden_cell = vec![dim_hidden_cell[2] * dim_hidden_cell[1], dim_hidden_cell[2], 1]; + let stride_hidden_cell = vec![ + dim_hidden_cell[2] * dim_hidden_cell[1], + dim_hidden_cell[2], + 1, + ]; // FIXME: Ensure hidden_size*2 is used for bidirectional models for _ in 0..sequence_length { - x_desc.push(TensorDescriptor::new( - &dim_input, - &stride_input, - data_type, - ).unwrap()); - dxdesc.push(TensorDescriptor::new( - &dim_input, - &stride_input, - data_type, - ).unwrap()); - y_desc.push(TensorDescriptor::new( - &dim_output, - &stride_output, - data_type, - ).unwrap()); - dydesc.push(TensorDescriptor::new( - &dim_output, - &stride_output, - data_type, - ).unwrap()); + x_desc.push(TensorDescriptor::new(&dim_input, &stride_input, data_type).unwrap()); + dxdesc.push(TensorDescriptor::new(&dim_input, &stride_input, data_type).unwrap()); + y_desc.push(TensorDescriptor::new(&dim_output, &stride_output, data_type).unwrap()); + dydesc.push(TensorDescriptor::new(&dim_output, &stride_output, data_type).unwrap()); } Ok(RnnSequenceDescriptors { @@ -59,46 +48,14 @@ fn rnn_sequence_descriptors(sequence_length: i32, y_desc, dx_desc: dxdesc, dy_desc: dydesc, - hx_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - hy_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - cx_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - cy_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - dhx_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - dhy_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - dcx_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), - dcy_desc: TensorDescriptor::new( - &dim_hidden_cell, - &stride_hidden_cell, - data_type, - ).unwrap(), + hx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + hy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + cx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + cy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + dhx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + dhy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + dcx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), + dcy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(), }) } @@ -116,31 +73,36 @@ pub trait ICudnnDesc { fn cudnn_filter_desc(&self) -> Result; - fn cudnn_convolution_desc(&self, - filter: &SharedTensor) - -> Result; - - fn cudnn_rnn_desc(&self, - cudnn_framework: &Cudnn, - hidden_size: i32, - num_layers: i32, - dropout_desc: utils::DropoutConfig, - input_mode: cudnnRNNInputMode_t, - direction: cudnnDirectionMode_t, - mode: cudnnRNNMode_t, - algorithm: cudnnRNNAlgo_t, - padding_mode: cudnnRNNPaddingMode_t) -> Result; + fn cudnn_convolution_desc( + &self, + filter: &SharedTensor, + ) -> Result; + + fn cudnn_rnn_desc( + &self, + cudnn_framework: &Cudnn, + hidden_size: i32, + num_layers: i32, + dropout_desc: utils::DropoutConfig, + input_mode: cudnnRNNInputMode_t, + direction: cudnnDirectionMode_t, + mode: cudnnRNNMode_t, + algorithm: cudnnRNNAlgo_t, + padding_mode: cudnnRNNPaddingMode_t, + ) -> Result; } impl ConvForwardAlgo { /// Tries to return the matching cuDNN type for the enum value. fn as_cudnn(&self) -> Result { - use crate::ConvForwardAlgo::*; use crate::cudnn::cudnnConvolutionFwdAlgo_t::*; + use crate::ConvForwardAlgo::*; Ok(match *self { Auto => { - return Err(Error::Plugin(PluginError::Plugin("Can't create cuDNN convolution forward algorithm from \ - ConvForwardAlgo::Auto. Use `find_cudnn_algo` to find an algorithm."))) + return Err(Error::Plugin(PluginError::Plugin( + "Can't create cuDNN convolution forward algorithm from \ + ConvForwardAlgo::Auto. Use `find_cudnn_algo` to find an algorithm.", + ))) } GEMM => CUDNN_CONVOLUTION_FWD_ALGO_GEMM, ImplicitGEMM => CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM, @@ -155,8 +117,8 @@ impl ConvForwardAlgo { /// Returns the matching enum value for a cuDNN algo. fn from_cudnn(algo: &cudnnConvolutionFwdAlgo_t) -> ConvForwardAlgo { - use crate::ConvForwardAlgo::*; use crate::cudnn::cudnnConvolutionFwdAlgo_t::*; + use crate::ConvForwardAlgo::*; match *algo { CUDNN_CONVOLUTION_FWD_ALGO_GEMM => GEMM, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => ImplicitGEMM, @@ -171,24 +133,31 @@ impl ConvForwardAlgo { } /// Try to find best algorithm for a operation that uses the provided descriptors. - fn find_cudnn_algo(&self, - cudnn_framework: &Cudnn, - filter_desc: &FilterDescriptor, - conv_desc: &ConvolutionDescriptor, - src_desc: &TensorDescriptor, - dest_desc: &TensorDescriptor) - -> Result { + fn find_cudnn_algo( + &self, + cudnn_framework: &Cudnn, + filter_desc: &FilterDescriptor, + conv_desc: &ConvolutionDescriptor, + src_desc: &TensorDescriptor, + dest_desc: &TensorDescriptor, + ) -> Result { if !self.is_auto() { return Ok(*self); } - let algos = API::find_convolution_forward_algorithm(*cudnn_framework.id_c(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); + let algos = API::find_convolution_forward_algorithm( + *cudnn_framework.id_c(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); let algo = match algos.len() { - 0 => return Err(Error::Plugin(PluginError::Operation("Unable to find CUDA cuDNN convolution forward algorithm."))), + 0 => { + return Err(Error::Plugin(PluginError::Operation( + "Unable to find CUDA cuDNN convolution forward algorithm.", + ))) + } _ => algos[0].algo, }; Ok(ConvForwardAlgo::from_cudnn(&algo)) @@ -198,13 +167,15 @@ impl ConvForwardAlgo { impl ConvBackwardFilterAlgo { /// Tries to return the matching cuDNN type for the enum value. fn as_cudnn(&self) -> Result { - use crate::ConvBackwardFilterAlgo::*; use crate::cudnn::cudnnConvolutionBwdFilterAlgo_t::*; + use crate::ConvBackwardFilterAlgo::*; Ok(match *self { Auto => { - return Err(Error::Plugin(PluginError::Plugin("Can't create cuDNN convolution backward filter algorithm from \ + return Err(Error::Plugin(PluginError::Plugin( + "Can't create cuDNN convolution backward filter algorithm from \ ConvBackwardFilterAlgo::Auto. Use `find_cudnn_algo` to find an \ - algorithm."))) + algorithm.", + ))) } ImplicitGEMM => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1, ImplicitGEMMSum => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0, @@ -216,8 +187,8 @@ impl ConvBackwardFilterAlgo { /// Returns the matching enum value for a cuDNN algo. fn from_cudnn(algo: &cudnnConvolutionBwdFilterAlgo_t) -> ConvBackwardFilterAlgo { - use crate::ConvBackwardFilterAlgo::*; use crate::cudnn::cudnnConvolutionBwdFilterAlgo_t::*; + use crate::ConvBackwardFilterAlgo::*; match *algo { CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => ImplicitGEMMSum, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => ImplicitGEMM, @@ -229,24 +200,31 @@ impl ConvBackwardFilterAlgo { } /// Try to find best algorithm for a operation that uses the provided descriptors. - fn find_cudnn_algo(&self, - cudnn_framework: &Cudnn, - filter_desc: &FilterDescriptor, - conv_desc: &ConvolutionDescriptor, - src_desc: &TensorDescriptor, - dest_desc: &TensorDescriptor) - -> Result { + fn find_cudnn_algo( + &self, + cudnn_framework: &Cudnn, + filter_desc: &FilterDescriptor, + conv_desc: &ConvolutionDescriptor, + src_desc: &TensorDescriptor, + dest_desc: &TensorDescriptor, + ) -> Result { if !self.is_auto() { return Ok(*self); } - let algos = API::find_convolution_backward_filter_algorithm(*cudnn_framework.id_c(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); + let algos = API::find_convolution_backward_filter_algorithm( + *cudnn_framework.id_c(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); let algo = match algos.len() { - 0 => return Err(Error::Plugin(PluginError::Operation("Unable to find CUDA cuDNN convolution backward filter algorithm."))), + 0 => { + return Err(Error::Plugin(PluginError::Operation( + "Unable to find CUDA cuDNN convolution backward filter algorithm.", + ))) + } _ => algos[0].algo, }; Ok(ConvBackwardFilterAlgo::from_cudnn(&algo)) @@ -256,13 +234,15 @@ impl ConvBackwardFilterAlgo { impl ConvBackwardDataAlgo { /// Tries to return the matching cuDNN type for the enum value. fn as_cudnn(&self) -> Result { - use crate::ConvBackwardDataAlgo::*; use crate::cudnn::cudnnConvolutionBwdDataAlgo_t::*; + use crate::ConvBackwardDataAlgo::*; Ok(match *self { Auto => { - return Err(Error::Plugin(PluginError::Plugin("Can't create cuDNN convolution backward data algorithm from \ + return Err(Error::Plugin(PluginError::Plugin( + "Can't create cuDNN convolution backward data algorithm from \ ConvBackwardDataAlgo::Auto. Use `find_cudnn_algo` to find \ - an algorithm."))) + an algorithm.", + ))) } ImplicitGEMM => CUDNN_CONVOLUTION_BWD_DATA_ALGO_1, ImplicitGEMMSum => CUDNN_CONVOLUTION_BWD_DATA_ALGO_0, @@ -275,8 +255,8 @@ impl ConvBackwardDataAlgo { /// Returns the matching enum value for a cuDNN algo. fn from_cudnn(algo: &cudnnConvolutionBwdDataAlgo_t) -> ConvBackwardDataAlgo { - use crate::ConvBackwardDataAlgo::*; use crate::cudnn::cudnnConvolutionBwdDataAlgo_t::*; + use crate::ConvBackwardDataAlgo::*; match *algo { CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => ImplicitGEMMSum, CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => ImplicitGEMM, @@ -289,25 +269,32 @@ impl ConvBackwardDataAlgo { } /// Try to find best algorithm for a operation that uses the provided descriptors. - fn find_cudnn_algo(&self, - cudnn_framework: &Cudnn, - filter_desc: &FilterDescriptor, - conv_desc: &ConvolutionDescriptor, - src_desc: &TensorDescriptor, - dest_desc: &TensorDescriptor) - -> Result { + fn find_cudnn_algo( + &self, + cudnn_framework: &Cudnn, + filter_desc: &FilterDescriptor, + conv_desc: &ConvolutionDescriptor, + src_desc: &TensorDescriptor, + dest_desc: &TensorDescriptor, + ) -> Result { if !self.is_auto() { return Ok(*self); } - let algos = API::find_convolution_backward_data_algorithm(*cudnn_framework.id_c(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); + let algos = API::find_convolution_backward_data_algorithm( + *cudnn_framework.id_c(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); let algo = match algos.len() { - 0 => return Err(Error::Plugin(PluginError::Operation("Unable to find CUDA cuDNN convolution backward data algorithm."))), + 0 => { + return Err(Error::Plugin(PluginError::Operation( + "Unable to find CUDA cuDNN convolution backward data algorithm.", + ))) + } _ => algos[0].algo, }; Ok(ConvBackwardDataAlgo::from_cudnn(&algo)) @@ -315,14 +302,19 @@ impl ConvBackwardDataAlgo { } impl ICudnnDesc for SharedTensor - where T: Float + DataTypeInfo +where + T: Float + DataTypeInfo, { fn cudnn_tensor_desc(&self) -> Result { - match TensorDescriptor::new(&self.desc().dims_i32().clone(), - &self.desc().default_stride_i32().clone(), - ::cudnn_data_type()) { + match TensorDescriptor::new( + &self.desc().dims_i32().clone(), + &self.desc().default_stride_i32().clone(), + ::cudnn_data_type(), + ) { Ok(desc) => Ok(desc), - Err(_) => Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor.")), + Err(_) => Err(PluginError::Plugin( + "Unable to create CuDNN TensorDescriptor.", + )), } } @@ -337,11 +329,15 @@ impl ICudnnDesc for SharedTensor 3 => vec![1, actual_desc[0], actual_desc[1], actual_desc[2]], _ => actual_desc, }; - match TensorDescriptor::new(&override_desc.dims_i32().clone(), - &override_desc.default_stride_i32().clone(), - ::cudnn_data_type()) { + match TensorDescriptor::new( + &override_desc.dims_i32().clone(), + &override_desc.default_stride_i32().clone(), + ::cudnn_data_type(), + ) { Ok(desc) => Ok(desc), - Err(_) => Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor.")), + Err(_) => Err(PluginError::Plugin( + "Unable to create CuDNN TensorDescriptor.", + )), } } @@ -355,19 +351,27 @@ impl ICudnnDesc for SharedTensor for dim in actual_desc { override_desc.push(dim); } - match TensorDescriptor::new(&override_desc.dims_i32().clone(), - &override_desc.default_stride_i32().clone(), - ::cudnn_data_type()) { + match TensorDescriptor::new( + &override_desc.dims_i32().clone(), + &override_desc.default_stride_i32().clone(), + ::cudnn_data_type(), + ) { Ok(desc) => Ok(desc), - Err(_) => Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor.")), + Err(_) => Err(PluginError::Plugin( + "Unable to create CuDNN TensorDescriptor.", + )), } } fn cudnn_filter_desc(&self) -> Result { - match FilterDescriptor::new(&self.desc().dims_i32().clone(), - ::cudnn_data_type()) { + match FilterDescriptor::new( + &self.desc().dims_i32().clone(), + ::cudnn_data_type(), + ) { Ok(desc) => Ok(desc), - Err(_) => Err(PluginError::Plugin("Unable to create CuDNN FilterDescriptor.")), + Err(_) => Err(PluginError::Plugin( + "Unable to create CuDNN FilterDescriptor.", + )), } } @@ -376,14 +380,19 @@ impl ICudnnDesc for SharedTensor // unimplemented!() //} - fn cudnn_convolution_desc(&self, - filter: &SharedTensor) - -> Result { - match ConvolutionDescriptor::new(&self.desc().dims_i32().clone(), - &filter.desc().default_stride_i32().clone(), - ::cudnn_data_type()) { + fn cudnn_convolution_desc( + &self, + filter: &SharedTensor, + ) -> Result { + match ConvolutionDescriptor::new( + &self.desc().dims_i32().clone(), + &filter.desc().default_stride_i32().clone(), + ::cudnn_data_type(), + ) { Ok(desc) => Ok(desc), - Err(_) => Err(PluginError::Plugin("Unable to create CuDNN ConvolutionDescriptor.")), + Err(_) => Err(PluginError::Plugin( + "Unable to create CuDNN ConvolutionDescriptor.", + )), } } @@ -418,7 +427,8 @@ impl ICudnnDesc for SharedTensor } impl NN for Backend - where T: Float + DataTypeInfo +where + T: Float + DataTypeInfo, { type CC = utils::ConvolutionConfig; type CLRN = utils::NormalizationConfig; @@ -438,60 +448,70 @@ impl NNOperationConfig for utils::PoolingConfig where T: Float + DataTypeI impl NNOperationConfig for utils::DropoutConfig where T: Float + DataTypeInfo {} impl Sigmoid for Backend - where T: Float + DataTypeInfo + Default +where + T: Float + DataTypeInfo + Default, { - fn sigmoid(&self, - x: &SharedTensor, - result: &mut SharedTensor) - -> Result<(), Error> { - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + fn sigmoid(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error> { + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let cudnn_framework = self.framework().cudnn(); let r_desc = result.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.sigmoid_forward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.sigmoid_forward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation Sigmoid Forward.", + ))), } } - fn sigmoid_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), Error> { + fn sigmoid_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.sigmoid_backward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc_flat()?, - trans!(dx_mem), - &result.cudnn_tensor_desc_flat()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.sigmoid_backward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc_flat()?, + trans!(dx_mem), + &result.cudnn_tensor_desc_flat()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation Sigmoid Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation Sigmoid Backward.", + ))), } } } impl ConvolutionConfig for crate::cudnn::utils::ConvolutionConfig - where T: Float + DataTypeInfo +where + T: Float + DataTypeInfo, { fn workspace_size(&self) -> usize { self.largest_workspace_size() @@ -499,62 +519,80 @@ impl ConvolutionConfig for crate::cudnn::utils::ConvolutionConfig } impl Convolution for Backend - where T: Float + DataTypeInfo +where + T: Float + DataTypeInfo, { - fn new_convolution_config(&self, - src: &SharedTensor, - dest: &SharedTensor, - filter: &SharedTensor, - algo_fwd: ConvForwardAlgo, - algo_bwd_filter: ConvBackwardFilterAlgo, - algo_bwd_data: ConvBackwardDataAlgo, - stride: &[i32], - zero_padding: &[i32]) - -> Result { + fn new_convolution_config( + &self, + src: &SharedTensor, + dest: &SharedTensor, + filter: &SharedTensor, + algo_fwd: ConvForwardAlgo, + algo_bwd_filter: ConvBackwardFilterAlgo, + algo_bwd_data: ConvBackwardDataAlgo, + stride: &[i32], + zero_padding: &[i32], + ) -> Result { let cudnn_framework = self.framework().cudnn(); let src_desc = src.cudnn_tensor_desc()?; let dest_desc = dest.cudnn_tensor_desc()?; let filter_desc = filter.cudnn_filter_desc()?; - let conv_desc = crate::cudnn::ConvolutionDescriptor::new(zero_padding, - stride, - ::cudnn_data_type()) - .unwrap(); - - let useable_algo_fwd = - algo_fwd.find_cudnn_algo(cudnn_framework, &filter_desc, &conv_desc, &src_desc, &dest_desc)?; - let useable_algo_bwd_filter = - algo_bwd_filter.find_cudnn_algo(cudnn_framework, &filter_desc, &conv_desc, &src_desc, &dest_desc)?; - let useable_algo_bwd_data = - algo_bwd_data.find_cudnn_algo(cudnn_framework, &filter_desc, &conv_desc, &src_desc, &dest_desc)?; - - let mut workspace_size_fwd = - API::get_convolution_forward_workspace_size(*cudnn_framework.id_c(), - useable_algo_fwd.as_cudnn().unwrap(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); - let mut workspace_size_bwd_filter = - API::get_convolution_backward_filter_workspace_size(*cudnn_framework.id_c(), - useable_algo_bwd_filter - .as_cudnn() - .unwrap(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); - let mut workspace_size_bwd_data = - API::get_convolution_backward_data_workspace_size(*cudnn_framework.id_c(), - useable_algo_bwd_data - .as_cudnn() - .unwrap(), - *filter_desc.id_c(), - *conv_desc.id_c(), - *src_desc.id_c(), - *dest_desc.id_c()) - .unwrap(); + let conv_desc = crate::cudnn::ConvolutionDescriptor::new( + zero_padding, + stride, + ::cudnn_data_type(), + ) + .unwrap(); + + let useable_algo_fwd = algo_fwd.find_cudnn_algo( + cudnn_framework, + &filter_desc, + &conv_desc, + &src_desc, + &dest_desc, + )?; + let useable_algo_bwd_filter = algo_bwd_filter.find_cudnn_algo( + cudnn_framework, + &filter_desc, + &conv_desc, + &src_desc, + &dest_desc, + )?; + let useable_algo_bwd_data = algo_bwd_data.find_cudnn_algo( + cudnn_framework, + &filter_desc, + &conv_desc, + &src_desc, + &dest_desc, + )?; + + let mut workspace_size_fwd = API::get_convolution_forward_workspace_size( + *cudnn_framework.id_c(), + useable_algo_fwd.as_cudnn().unwrap(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); + let mut workspace_size_bwd_filter = API::get_convolution_backward_filter_workspace_size( + *cudnn_framework.id_c(), + useable_algo_bwd_filter.as_cudnn().unwrap(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); + let mut workspace_size_bwd_data = API::get_convolution_backward_data_workspace_size( + *cudnn_framework.id_c(), + useable_algo_bwd_data.as_cudnn().unwrap(), + *filter_desc.id_c(), + *conv_desc.id_c(), + *src_desc.id_c(), + *dest_desc.id_c(), + ) + .unwrap(); if workspace_size_fwd == 0 { workspace_size_fwd = 8; @@ -566,24 +604,28 @@ impl Convolution for Backend workspace_size_bwd_data = 8; } - Ok(crate::cudnn::utils::ConvolutionConfig::new(useable_algo_fwd.as_cudnn().unwrap(), - workspace_size_fwd, - useable_algo_bwd_filter.as_cudnn().unwrap(), - workspace_size_bwd_filter, - useable_algo_bwd_data.as_cudnn().unwrap(), - workspace_size_bwd_data, - conv_desc, - filter_desc)) - } - fn convolution(&self, - filter: &SharedTensor, - x: &SharedTensor, - result: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + Ok(crate::cudnn::utils::ConvolutionConfig::new( + useable_algo_fwd.as_cudnn().unwrap(), + workspace_size_fwd, + useable_algo_bwd_filter.as_cudnn().unwrap(), + workspace_size_bwd_filter, + useable_algo_bwd_data.as_cudnn().unwrap(), + workspace_size_bwd_data, + conv_desc, + filter_desc, + )) + } + fn convolution( + &self, + filter: &SharedTensor, + x: &SharedTensor, + result: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc()?; let f_mem = read!(filter, self); @@ -591,84 +633,104 @@ impl Convolution for Backend let r_mem = write_only!(result, self); let w_mem = write_only!(workspace, self); - match cudnn_framework.convolution_forward(config, - trans_mut!(w_mem), - trans!(f_mem), - &x.cudnn_tensor_desc()?, // src_desc - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.convolution_forward( + config, + trans_mut!(w_mem), + trans!(f_mem), + &x.cudnn_tensor_desc()?, // src_desc + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation convolution Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation convolution Forward.", + ))), } } - fn convolution_grad_filter(&self, - src_data: &SharedTensor, - dest_diff: &SharedTensor, - filter_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + fn convolution_grad_filter( + &self, + src_data: &SharedTensor, + dest_diff: &SharedTensor, + filter_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let s_mem = read!(src_data, self); let dd_mem = read!(dest_diff, self); let df_mem = write_only!(filter_diff, self); let w_mem = write_only!(workspace, self); - match cudnn_framework.convolution_backward_filter(config, - trans_mut!(w_mem), - &src_data.cudnn_tensor_desc()?, - trans!(s_mem), - &dest_diff.cudnn_tensor_desc()?, - trans!(dd_mem), - trans_mut!(df_mem), - scal_params) { + match cudnn_framework.convolution_backward_filter( + config, + trans_mut!(w_mem), + &src_data.cudnn_tensor_desc()?, + trans!(s_mem), + &dest_diff.cudnn_tensor_desc()?, + trans!(dd_mem), + trans_mut!(df_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation convolution Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation convolution Backward.", + ))), } } - fn convolution_grad_data(&self, - filter: &SharedTensor, - x_diff: &SharedTensor, - result_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + fn convolution_grad_data( + &self, + filter: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc()?; let f_mem = read!(filter, self); let dx_mem = read!(x_diff, self); let dr_mem = write_only!(result_diff, self); let w_mem = write_only!(workspace, self); - match cudnn_framework.convolution_backward_data(config, - trans_mut!(w_mem), - trans!(f_mem), - &x_diff.cudnn_tensor_desc()?, - trans!(dx_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.convolution_backward_data( + config, + trans_mut!(w_mem), + trans!(f_mem), + &x_diff.cudnn_tensor_desc()?, + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Operation("Unable to execute CUDA cuDNN Activation convolution Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Operation( + "Unable to execute CUDA cuDNN Activation convolution Backward.", + ))), } } } -impl RnnConfig for crate::cudnn::utils::RnnConfig where T: Float + DataTypeInfo +impl RnnConfig for crate::cudnn::utils::RnnConfig +where + T: Float + DataTypeInfo, { - fn workspace_size(&self) -> usize { self.largest_workspace_size() } + fn workspace_size(&self) -> usize { + self.largest_workspace_size() + } } impl RnnInputMode { fn as_cudnn(&self) -> Result { Ok(match self { RnnInputMode::LinearInput => cudnnRNNInputMode_t::CUDNN_LINEAR_INPUT, - RnnInputMode::SkipInput => cudnnRNNInputMode_t::CUDNN_SKIP_INPUT + RnnInputMode::SkipInput => cudnnRNNInputMode_t::CUDNN_SKIP_INPUT, }) } @@ -685,7 +747,7 @@ impl DirectionMode { fn as_cudnn(&self) -> Result { Ok(match self { DirectionMode::BiDirectional => cudnnDirectionMode_t::CUDNN_BIDIRECTIONAL, - DirectionMode::UniDirectional => cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL + DirectionMode::UniDirectional => cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL, }) } @@ -693,7 +755,7 @@ impl DirectionMode { match direction { cudnnDirectionMode_t::CUDNN_BIDIRECTIONAL => DirectionMode::BiDirectional, cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL => DirectionMode::UniDirectional, - _ => unreachable!() + _ => unreachable!(), } } } @@ -704,7 +766,7 @@ impl RnnNetworkMode { RnnNetworkMode::ReLU => cudnnRNNMode_t::CUDNN_RNN_RELU, RnnNetworkMode::Tanh => cudnnRNNMode_t::CUDNN_RNN_TANH, RnnNetworkMode::LSTM => cudnnRNNMode_t::CUDNN_LSTM, - RnnNetworkMode::GRU => cudnnRNNMode_t::CUDNN_GRU + RnnNetworkMode::GRU => cudnnRNNMode_t::CUDNN_GRU, }) } @@ -725,7 +787,7 @@ impl RnnAlgorithm { RnnAlgorithm::PersistDynamic => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_DYNAMIC, RnnAlgorithm::PersistStatic => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_STATIC, RnnAlgorithm::Standard => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_STANDARD, - RnnAlgorithm::Count => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_COUNT + RnnAlgorithm::Count => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_COUNT, }) } @@ -745,7 +807,9 @@ impl MathType { match self { MathType::Default => Ok(cudnnMathType_t::CUDNN_DEFAULT_MATH), MathType::TensorOPMath => Ok(cudnnMathType_t::CUDNN_TENSOR_OP_MATH), - MathType::TensorOPMathAllowConversion => Ok(cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION) + MathType::TensorOPMathAllowConversion => { + Ok(cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION) + } } } @@ -753,7 +817,9 @@ impl MathType { match math_type { cudnnMathType_t::CUDNN_DEFAULT_MATH => MathType::Default, cudnnMathType_t::CUDNN_TENSOR_OP_MATH => MathType::TensorOPMath, - cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION => MathType::TensorOPMathAllowConversion, + cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION => { + MathType::TensorOPMathAllowConversion + } _ => unreachable!(), } } @@ -763,7 +829,7 @@ impl RnnPaddingMode { fn as_cudnn(&self) -> Result { match self { RnnPaddingMode::Enabled => Ok(CUDNN_RNN_PADDED_IO_ENABLED), - RnnPaddingMode::Disabled => Ok(CUDNN_RNN_PADDED_IO_DISABLED) + RnnPaddingMode::Disabled => Ok(CUDNN_RNN_PADDED_IO_DISABLED), } } @@ -808,12 +874,15 @@ pub struct RnnSequenceDescriptors { pub dcy_desc: TensorDescriptor, } -impl Rnn for Backend where T: Float + DataTypeInfo { +impl Rnn for Backend +where + T: Float + DataTypeInfo, +{ fn generate_rnn_weight_description( &self, rnn_config: &Self::CRNN, batch_size: i32, - input_size: i32 + input_size: i32, ) -> Result, Error> { let cudnn_framework = self.framework().cudnn(); let data_type = ::cudnn_data_type(); @@ -822,11 +891,8 @@ impl Rnn for Backend where T: Float + DataTypeInfo { let dim_input = vec![batch_size, input_size, 1]; let stride_input = vec![dim_input[2] * dim_input[1], dim_input[2], 1]; - let x_desc_single_iterator = TensorDescriptor::new( - &dim_input, - &stride_input, - data_type, - ).unwrap(); + let x_desc_single_iterator = + TensorDescriptor::new(&dim_input, &stride_input, data_type).unwrap(); let weight_size: usize = match API::get_rnn_params_size( *cudnn_framework.id_c(), @@ -834,9 +900,12 @@ impl Rnn for Backend where T: Float + DataTypeInfo { // Input. A fully packed tensor descriptor describing the input to one recurrent iteration. // Appears to be a single descriptor, not an array of tensor descriptors. *x_desc_single_iterator.id_c(), - data_type) { + data_type, + ) { Ok(size) => Ok(size), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to get CudNN Rnn Params Size."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to get CudNN Rnn Params Size.", + ))), }?; // TODO: Update for different sizing. let dim_w: Vec = vec![weight_size / 4, 1, 1]; @@ -869,7 +938,9 @@ impl Rnn for Backend where T: Float + DataTypeInfo { dropout_seed.unwrap_or(0), ) { Ok(dropout_object) => Ok(dropout_object), - Err(_e) => Err(Error::Plugin(PluginError::Plugin("Unable to create Dropout Layer"))) + Err(_e) => Err(Error::Plugin(PluginError::Plugin( + "Unable to create Dropout Layer", + ))), }?; let dropout_memory: cudnnDropoutDescriptor_t = *drop_desc.dropout_desc().id_c(); @@ -881,7 +952,8 @@ impl Rnn for Backend where T: Float + DataTypeInfo { batch_size, num_layers, ::cudnn_data_type(), - )?.x_desc; + )? + .x_desc; let rnn_desc = match RnnDescriptor::new( &cudnn_framework, @@ -896,7 +968,7 @@ impl Rnn for Backend where T: Float + DataTypeInfo { (RnnPaddingMode::Disabled).as_cudnn().unwrap(), ) { Ok(desc) => desc, - Err(e) => panic!("Error {:?}", e) + Err(e) => panic!("Error {:?}", e), }; match cudnn_framework.init_rnn( @@ -914,7 +986,7 @@ impl Rnn for Backend where T: Float + DataTypeInfo { MathType::TensorOPMathAllowConversion.as_cudnn()?, ) { Ok(rnn_config) => Ok(rnn_config), - Err(e) => panic!("Error {:?}", e) + Err(e) => panic!("Error {:?}", e), } } @@ -966,19 +1038,22 @@ impl Rnn for Backend where T: Float + DataTypeInfo { *reserve.id_c(), ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to perform RNN Forward"))) + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to perform RNN Forward", + ))), } } - fn rnn_backward_data(&self, - src: &SharedTensor, - src_gradient: &mut SharedTensor, - output: &SharedTensor, - output_gradient: &SharedTensor, - rnn_config: &Self::CRNN, - weight: &SharedTensor, - workspace: &mut SharedTensor) - -> Result<(), Error> { + fn rnn_backward_data( + &self, + src: &SharedTensor, + src_gradient: &mut SharedTensor, + output: &SharedTensor, + output_gradient: &SharedTensor, + rnn_config: &Self::CRNN, + weight: &SharedTensor, + workspace: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); let src_dimensions = src.desc().clone(); let sequence_descriptors = rnn_sequence_descriptors( @@ -1027,17 +1102,20 @@ impl Rnn for Backend where T: Float + DataTypeInfo { *reserve_space.id_c(), ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Operation("Unable to execute CUDA cuDNN RNN Backward Data"))), + Err(_) => Err(Error::Plugin(PluginError::Operation( + "Unable to execute CUDA cuDNN RNN Backward Data", + ))), } } - fn rnn_backward_weights(&self, - src: &SharedTensor, - output: &SharedTensor, - filter: &mut SharedTensor, - rnn_config: &Self::CRNN, - workspace: &mut SharedTensor) - -> Result<(), Error> { + fn rnn_backward_weights( + &self, + src: &SharedTensor, + output: &SharedTensor, + filter: &mut SharedTensor, + rnn_config: &Self::CRNN, + workspace: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); let src_dimensions = src.desc().clone(); let sequence_descriptors = rnn_sequence_descriptors( @@ -1068,582 +1146,695 @@ impl Rnn for Backend where T: Float + DataTypeInfo { *reserve_space.id_c(), ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Operation("Unable to execute CUDA cuDNN RNN Backward Data"))), + Err(_) => Err(Error::Plugin(PluginError::Operation( + "Unable to execute CUDA cuDNN RNN Backward Data", + ))), } } } impl SigmoidPointwise for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn sigmoid_pointwise(&self, - x: &mut SharedTensor) - -> Result<(), Error> { + fn sigmoid_pointwise(&self, x: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let x_mem = read_write!(x, self); - match cudnn_framework.sigmoid_forward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &x_desc, - trans_mut!(x_mem), - scal_params) { + match cudnn_framework.sigmoid_forward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &x_desc, + trans_mut!(x_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Sigmoid Pointwise forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Sigmoid Pointwise forward.", + ))), } } - fn sigmoid_pointwise_grad(&self, - x: &SharedTensor, - x_diff: &mut SharedTensor) - -> Result<(), Error> { + fn sigmoid_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let dx_desc = x_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read_write!(x_diff, self); // TODO move config one level up - match cudnn_framework.sigmoid_backward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &dx_desc, - trans!(dx_mem), - &x_desc, - trans!(x_mem), - &dx_desc, - trans_mut!(dx_mem), - scal_params) { + match cudnn_framework.sigmoid_backward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &dx_desc, + trans!(dx_mem), + &x_desc, + trans!(x_mem), + &dx_desc, + trans_mut!(dx_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Operation("Unable to execute CUDA cuDNN Sigmoid Pointwise backward."))), + Err(_) => Err(Error::Plugin(PluginError::Operation( + "Unable to execute CUDA cuDNN Sigmoid Pointwise backward.", + ))), } } } impl Relu for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn relu(&self, - x: &SharedTensor, - result: &mut SharedTensor) - -> Result<(), Error> { + fn relu(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.relu_forward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.relu_forward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation relu Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation relu Forward.", + ))), } } - fn relu_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), Error> { + fn relu_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.relu_backward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc_flat()?, - trans!(dx_mem), - &result.cudnn_tensor_desc_flat()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.relu_backward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc_flat()?, + trans!(dx_mem), + &result.cudnn_tensor_desc_flat()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation relu Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation relu Backward.", + ))), } } } impl ReluPointwise for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn relu_pointwise(&self, - x: &mut SharedTensor) - -> Result<(), Error> { + fn relu_pointwise(&self, x: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let x_mem = read_write!(x, self); - match cudnn_framework.relu_forward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &x_desc, - trans_mut!(x_mem), - scal_params) { + match cudnn_framework.relu_forward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &x_desc, + trans_mut!(x_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN ReLU Pointwise forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN ReLU Pointwise forward.", + ))), } } - fn relu_pointwise_grad(&self, - x: &SharedTensor, - x_diff: &mut SharedTensor) - -> Result<(), Error> { + fn relu_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let dx_desc = x_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read_write!(x_diff, self); - match cudnn_framework.relu_backward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &dx_desc, - trans!(dx_mem), - &x_desc, - trans!(x_mem), - &dx_desc, - trans_mut!(dx_mem), - scal_params) { + match cudnn_framework.relu_backward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &dx_desc, + trans!(dx_mem), + &x_desc, + trans!(x_mem), + &dx_desc, + trans_mut!(dx_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN ReLU Pointwise backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN ReLU Pointwise backward.", + ))), } } } impl Tanh for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn tanh(&self, - x: &SharedTensor, - result: &mut SharedTensor) - -> Result<(), Error> { + fn tanh(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.tanh_forward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.tanh_forward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation tanh Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation tanh Forward.", + ))), } } - fn tanh_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), Error> { + fn tanh_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.tanh_backward(&cudnn_framework.init_activation().unwrap(), - &x.cudnn_tensor_desc_flat()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc_flat()?, - trans!(dx_mem), - &result.cudnn_tensor_desc_flat()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.tanh_backward( + &cudnn_framework.init_activation().unwrap(), + &x.cudnn_tensor_desc_flat()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc_flat()?, + trans!(dx_mem), + &result.cudnn_tensor_desc_flat()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation tanh Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation tanh Backward.", + ))), } } } impl TanhPointwise for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn tanh_pointwise(&self, - x: &mut SharedTensor) - -> Result<(), Error> { + fn tanh_pointwise(&self, x: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let x_mem = read_write!(x, self); - match cudnn_framework.tanh_forward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &x_desc, - trans_mut!(x_mem), - scal_params) { + match cudnn_framework.tanh_forward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &x_desc, + trans_mut!(x_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Tanh Pointwise forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Tanh Pointwise forward.", + ))), } } - fn tanh_pointwise_grad(&self, - x: &SharedTensor, - x_diff: &mut SharedTensor) - -> Result<(), Error> { + fn tanh_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let x_desc = x.cudnn_tensor_desc_flat()?; let dx_desc = x_diff.cudnn_tensor_desc_flat()?; let x_mem = read!(x, self); let dx_mem = read_write!(x_diff, self); - match cudnn_framework.tanh_backward(&cudnn_framework.init_activation().unwrap(), - &x_desc, - trans!(x_mem), - &dx_desc, - trans!(dx_mem), - &x_desc, - trans!(x_mem), - &dx_desc, - trans_mut!(dx_mem), - scal_params) { + match cudnn_framework.tanh_backward( + &cudnn_framework.init_activation().unwrap(), + &x_desc, + trans!(x_mem), + &dx_desc, + trans!(dx_mem), + &x_desc, + trans!(x_mem), + &dx_desc, + trans_mut!(dx_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Tanh Pointwise backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Tanh Pointwise backward.", + ))), } } } impl Softmax for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn softmax(&self, - x: &SharedTensor, - result: &mut SharedTensor) - -> Result<(), Error> { + fn softmax(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc_softmax()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.softmax_forward(&x.cudnn_tensor_desc_softmax()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.softmax_forward( + &x.cudnn_tensor_desc_softmax()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN softmax Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN softmax Forward.", + ))), } } - fn softmax_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), Error> { + fn softmax_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc_softmax()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.softmax_backward(&x.cudnn_tensor_desc_softmax()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc_softmax()?, - trans!(dx_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.softmax_backward( + &x.cudnn_tensor_desc_softmax()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc_softmax()?, + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN softmax Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN softmax Backward.", + ))), } } } impl LogSoftmax for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn log_softmax(&self, - x: &SharedTensor, - result: &mut SharedTensor) - -> Result<(), Error> { + fn log_softmax(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc_softmax()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.log_softmax_forward(&x.cudnn_tensor_desc_softmax()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.log_softmax_forward( + &x.cudnn_tensor_desc_softmax()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN softmax Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN softmax Forward.", + ))), } } - fn log_softmax_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), Error> { + fn log_softmax_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc_softmax()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.log_softmax_backward(&x.cudnn_tensor_desc_softmax()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc_softmax()?, - trans!(dx_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.log_softmax_backward( + &x.cudnn_tensor_desc_softmax()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc_softmax()?, + trans!(dx_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN log softmax Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN log softmax Backward.", + ))), } } - } impl LRN for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn new_lrn_config(&self, - n: u32, - alpha: f64, - beta: f64, - k: f64) - -> Result { + fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) -> Result { let cudnn_framework = self.framework().cudnn(); - Ok(cudnn_framework.init_normalization(n, alpha, beta, k).unwrap()) + Ok(cudnn_framework + .init_normalization(n, alpha, beta, k) + .unwrap()) } - fn lrn(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CLRN) - -> Result<(), Error> { + fn lrn( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CLRN, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.lrn_forward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.lrn_forward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation lrn Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation lrn Forward.", + ))), } } #[allow(unused_variables)] - fn lrn_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CLRN) - -> Result<(), Error> { + fn lrn_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CLRN, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.lrn_backward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc()?, - trans!(dx_mem), - &result.cudnn_tensor_desc()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.lrn_backward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc()?, + trans!(dx_mem), + &result.cudnn_tensor_desc()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Activation lrn Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Activation lrn Backward.", + ))), } } } impl Pooling for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn new_pooling_config(&self, - window: &[i32], - stride: &[i32], - padding: &[i32]) - -> Result { - let pooling_avg = crate::cudnn::PoolingDescriptor::new(crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING, window, padding, stride).unwrap(); - let pooling_max = - crate::cudnn::PoolingDescriptor::new(crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_MAX, - window, - padding, - stride) - .unwrap(); - Ok(crate::cudnn::utils::PoolingConfig::new(pooling_avg, pooling_max)) - } - - fn pooling_max(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn new_pooling_config( + &self, + window: &[i32], + stride: &[i32], + padding: &[i32], + ) -> Result { + let pooling_avg = crate::cudnn::PoolingDescriptor::new( + crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING, + window, + padding, + stride, + ) + .unwrap(); + let pooling_max = crate::cudnn::PoolingDescriptor::new( + crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_MAX, + window, + padding, + stride, + ) + .unwrap(); + Ok(crate::cudnn::utils::PoolingConfig::new( + pooling_avg, + pooling_max, + )) + } + + fn pooling_max( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.pooling_max_forward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.pooling_max_forward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN max pooling Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN max pooling Forward.", + ))), } } #[allow(unused_variables)] - fn pooling_max_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_max_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.pooling_max_backward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc()?, - trans!(dx_mem), - &result.cudnn_tensor_desc()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.pooling_max_backward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc()?, + trans!(dx_mem), + &result.cudnn_tensor_desc()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN max pooling Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN max pooling Backward.", + ))), } } - fn pooling_avg(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_avg( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let r_desc = result.cudnn_tensor_desc()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.pooling_avg_forward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), - scal_params) { + match cudnn_framework.pooling_avg_forward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN avg pooling Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN avg pooling Forward.", + ))), } } #[allow(unused_variables)] - fn pooling_avg_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_avg_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); - let scal_params: crate::cudnn::utils::ScalParams = crate::cudnn::utils::ScalParams::default(); + let scal_params: crate::cudnn::utils::ScalParams = + crate::cudnn::utils::ScalParams::default(); let dr_desc = result_diff.cudnn_tensor_desc()?; let x_mem = read!(x, self); let dx_mem = read!(x_diff, self); let r_mem = read!(result, self); let dr_mem = write_only!(result_diff, self); - match cudnn_framework.pooling_avg_backward(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &x_diff.cudnn_tensor_desc()?, - trans!(dx_mem), - &result.cudnn_tensor_desc()?, - trans!(r_mem), - &dr_desc, - trans_mut!(dr_mem), - scal_params) { + match cudnn_framework.pooling_avg_backward( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &x_diff.cudnn_tensor_desc()?, + trans!(dx_mem), + &result.cudnn_tensor_desc()?, + trans!(r_mem), + &dr_desc, + trans_mut!(dr_mem), + scal_params, + ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN avg pooling Backward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN avg pooling Backward.", + ))), } } } - - impl Dropout for Backend - where T: Float + Default + DataTypeInfo +where + T: Float + Default + DataTypeInfo, { - fn new_dropout_config(&self, - probability: f32, - seed: u64, - ) - -> Result { + fn new_dropout_config(&self, probability: f32, seed: u64) -> Result { let cudnn_framework = self.framework().cudnn(); Ok(cudnn_framework.init_dropout(probability, seed).unwrap()) } - fn dropout(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CDROP) - -> Result<(), Error> { + fn dropout( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), Error> { let cudnn_framework = self.framework().cudnn(); let r_desc = result.cudnn_tensor_desc()?; let x_mem = read!(x, self); let r_mem = write_only!(result, self); - match cudnn_framework.dropout_forward::(config, - &x.cudnn_tensor_desc()?, - trans!(x_mem), - &r_desc, - trans_mut!(r_mem), + match cudnn_framework.dropout_forward::( + config, + &x.cudnn_tensor_desc()?, + trans!(x_mem), + &r_desc, + trans_mut!(r_mem), ) { Ok(_) => Ok(()), - Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Dropout Forward."))), + Err(_) => Err(Error::Plugin(PluginError::Plugin( + "Unable to execute CUDA cuDNN Dropout Forward.", + ))), } } #[allow(unused_variables)] - fn dropout_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CDROP) - -> Result<(), Error> { + fn dropout_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), Error> { // TODO what to do with the gradient? should be all zeroes since this is supposed to be a `nop` but I am not 100% sure about the nv implementations // let dr_desc = result_diff.cudnn_tensor_desc()?; // let x_mem = read!(x, self); @@ -1655,7 +1846,7 @@ impl Dropout for Backend // trans!(x_mem), // &result.cudnn_tensor_desc()?, // trans_mut!(r_mem)) { - // Ok(_) => Ok(()), + // Ok(_) => Ok(()), // Err(_) => Err(Error::Plugin(PluginError::Plugin("Unable to execute CUDA cuDNN Dropout Backward."))), // } Ok(()) diff --git a/coaster-nn/src/frameworks/native/helper.rs b/coaster-nn/src/frameworks/native/helper.rs index b8690effd..48c5a3792 100644 --- a/coaster-nn/src/frameworks/native/helper.rs +++ b/coaster-nn/src/frameworks/native/helper.rs @@ -1,11 +1,11 @@ //! Provides useful macros for easier NN implementation for native. use crate::co; -use crate::co::plugin::Error as PluginError; -use crate::co::plugin::numeric_helpers::Float; use crate::co::frameworks::native::flatbox::FlatBox; -use crate::{DirectionMode, RnnInputMode}; +use crate::co::plugin::numeric_helpers::Float; +use crate::co::plugin::Error as PluginError; use crate::RnnNetworkMode; +use crate::{DirectionMode, RnnInputMode}; #[derive(Debug, Copy, Clone)] #[allow(missing_docs)] @@ -20,7 +20,6 @@ pub struct PoolingConfig { pub stride: Vec, } - #[derive(Debug, Copy, Clone)] #[allow(missing_docs)] pub struct DropoutConfig { @@ -31,30 +30,31 @@ pub struct DropoutConfig { /// shortcut to reading a tensor as slice /// contains unwrap macro_rules! read { - ($x:ident, $t:ident, $slf:ident) => ( + ($x:ident, $t:ident, $slf:ident) => { $x.read($slf.device()).unwrap().as_slice::<$t>() - ) + }; } /// shortcut to reading a tensor as mut slice /// contains unwrap macro_rules! read_write { - ($x:ident, $t: ident, $slf:ident) => ( + ($x:ident, $t: ident, $slf:ident) => { $x.read_write($slf.device()).unwrap().as_mut_slice::<$t>() - ) + }; } /// shortcut to reading a tensor as mut slice /// contains unwrap macro_rules! write_only { - ($x:ident, $t: ident, $slf:ident) => ( + ($x:ident, $t: ident, $slf:ident) => { $x.write_only($slf.device()).unwrap().as_mut_slice::<$t>() - ) + }; } /// Just a helper function until SharedTensor has a nice interface for writing data pub fn write_to_memory(mem: &mut FlatBox, data: T) - where T::Item: Clone +where + T::Item: Clone, { let mem_buffer = mem.as_mut_slice::(); for (index, datum) in data.enumerate() { @@ -100,13 +100,18 @@ pub fn tanh_grad(x: T, dx: T) -> T { /// sigmoid impl generation macro #[macro_export] macro_rules! impl_ops_sigmoid_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl Sigmoid<$t> for $b { - fn sigmoid(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) - -> Result<(), Error> { - map1(read!(x, $t, self), - write_only!(result, $t, self), - crate::frameworks::native::helper::sigmoid) + fn sigmoid( + &self, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + ) -> Result<(), Error> { + map1( + read!(x, $t, self), + write_only!(result, $t, self), + crate::frameworks::native::helper::sigmoid, + ) } fn sigmoid_grad( @@ -114,46 +119,55 @@ macro_rules! impl_ops_sigmoid_for { x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, result: &SharedTensor<$t>, - result_diff: &mut SharedTensor<$t>) - -> Result<(), Error> { - map2(read!(x, $t, self), - read!(x_diff, $t, self), - write_only!(result_diff, $t, self), - crate::frameworks::native::helper::sigmoid_grad) + result_diff: &mut SharedTensor<$t>, + ) -> Result<(), Error> { + map2( + read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + crate::frameworks::native::helper::sigmoid_grad, + ) } } impl SigmoidPointwise<$t> for $b { - fn sigmoid_pointwise(&self, x: &mut SharedTensor<$t>) - -> Result<(), Error> { - map1_inplace(read_write!(x, $t, self), - crate::frameworks::native::helper::sigmoid) + fn sigmoid_pointwise(&self, x: &mut SharedTensor<$t>) -> Result<(), Error> { + map1_inplace( + read_write!(x, $t, self), + crate::frameworks::native::helper::sigmoid, + ) } fn sigmoid_pointwise_grad( &self, x: &SharedTensor<$t>, - x_diff: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - return - map2_inplace(read!(x, $t, self), - read_write!(x_diff, $t, self), - crate::frameworks::native::helper::sigmoid_grad) + x_diff: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + return map2_inplace( + read!(x, $t, self), + read_write!(x_diff, $t, self), + crate::frameworks::native::helper::sigmoid_grad, + ); } } - ); + }; } /// relu impl generation macro #[macro_export] macro_rules! impl_ops_relu_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl Relu<$t> for $b { - fn relu(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - map1(read!(x, $t, self), - write_only!(result, $t, self), - crate::frameworks::native::helper::relu) + fn relu( + &self, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + map1( + read!(x, $t, self), + write_only!(result, $t, self), + crate::frameworks::native::helper::relu, + ) } fn relu_grad( @@ -161,45 +175,57 @@ macro_rules! impl_ops_relu_for { x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, result: &SharedTensor<$t>, - result_diff: &mut SharedTensor<$t>) - -> Result<(), Error> { - map2(read!(x, $t, self), - read!(x_diff, $t, self), - write_only!(result_diff, $t, self), - crate::frameworks::native::helper::relu_grad) + result_diff: &mut SharedTensor<$t>, + ) -> Result<(), Error> { + map2( + read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + crate::frameworks::native::helper::relu_grad, + ) } } impl ReluPointwise<$t> for $b { - - fn relu_pointwise(&self, x: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - map1_inplace(read_write!(x, $t, self), - crate::frameworks::native::helper::relu) + fn relu_pointwise( + &self, + x: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + map1_inplace( + read_write!(x, $t, self), + crate::frameworks::native::helper::relu, + ) } fn relu_pointwise_grad( &self, x: &SharedTensor<$t>, - x_diff: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - map2_inplace(read!(x, $t, self), - read_write!(x_diff, $t, self), - crate::frameworks::native::helper::relu_grad) + x_diff: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + map2_inplace( + read!(x, $t, self), + read_write!(x_diff, $t, self), + crate::frameworks::native::helper::relu_grad, + ) } } - ); + }; } /// tanh impl generation macro #[macro_export] macro_rules! impl_ops_tanh_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl $crate::plugin::Tanh<$t> for $b { - fn tanh(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - map1(read!(x, $t, self), - write_only!(result, $t, self), - crate::frameworks::native::helper::tanh) + fn tanh( + &self, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + map1( + read!(x, $t, self), + write_only!(result, $t, self), + crate::frameworks::native::helper::tanh, + ) } fn tanh_grad( @@ -207,32 +233,40 @@ macro_rules! impl_ops_tanh_for { x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, result: &SharedTensor<$t>, - result_diff: &mut SharedTensor<$t>) - -> Result<(), Error> { - map2(read!(x, $t, self), - read!(x_diff, $t, self), - write_only!(result_diff, $t, self), - crate::frameworks::native::helper::tanh_grad) + result_diff: &mut SharedTensor<$t>, + ) -> Result<(), Error> { + map2( + read!(x, $t, self), + read!(x_diff, $t, self), + write_only!(result_diff, $t, self), + crate::frameworks::native::helper::tanh_grad, + ) } } impl $crate::plugin::TanhPointwise<$t> for $b { - fn tanh_pointwise(&self, x: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { - map1_inplace(read_write!(x, $t, self), - crate::frameworks::native::helper::tanh) + fn tanh_pointwise( + &self, + x: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { + map1_inplace( + read_write!(x, $t, self), + crate::frameworks::native::helper::tanh, + ) } fn tanh_pointwise_grad( &self, x: &SharedTensor<$t>, - x_diff: &mut SharedTensor<$t>) - -> Result<(), Error> { - map2_inplace(read!(x, $t, self), - read_write!(x_diff, $t, self), - crate::frameworks::native::helper::tanh_grad) + x_diff: &mut SharedTensor<$t>, + ) -> Result<(), Error> { + map2_inplace( + read!(x, $t, self), + read_write!(x_diff, $t, self), + crate::frameworks::native::helper::tanh_grad, + ) } } - ); + }; } #[derive(Debug, Clone)] @@ -243,7 +277,6 @@ pub struct ConvolutionConfig { pub padding: Vec, } - #[derive(Debug, Clone, Copy)] #[allow(missing_docs)] // TODO: Keep parallel with impl in Cuda @@ -267,10 +300,13 @@ pub struct RnnConfig { /// softmax impl generation macro #[macro_export] macro_rules! impl_ops_softmax_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl $crate::plugin::Softmax<$t> for $b { - fn softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) - -> Result<(), Error> { + fn softmax( + &self, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + ) -> Result<(), Error> { let xs = read!(x, $t, self); let rs = write_only!(result, $t, self); @@ -291,8 +327,8 @@ macro_rules! impl_ops_softmax_for { &self, x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, - result_diff: &mut SharedTensor<$t>) -> Result<(), Error> { - + result_diff: &mut SharedTensor<$t>, + ) -> Result<(), Error> { let xs = read!(x, $t, self); let dxs = read!(x_diff, $t, self); let drs = write_only!(result_diff, $t, self); @@ -305,23 +341,27 @@ macro_rules! impl_ops_softmax_for { map2(xs, dxs, drs, |t, dt| t * (dt - dot)) } } - ); + }; } /// log softmax impl generation macro #[macro_export] macro_rules! impl_ops_log_softmax_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl $crate::plugin::LogSoftmax<$t> for $b { - fn log_softmax(&self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { + fn log_softmax( + &self, + x: &SharedTensor<$t>, + result: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { let xs = read!(x, $t, self); let rs = write_only!(result, $t, self); - let max_x = xs.iter().fold(::std::$t::NEG_INFINITY, - |acc, &t| acc.max(t)); + let max_x = xs + .iter() + .fold(::std::$t::NEG_INFINITY, |acc, &t| acc.max(t)); - let mut logsum : $t = 0.0; + let mut logsum: $t = 0.0; for t in xs { logsum += (-(max_x - t)).exp(); } @@ -330,36 +370,38 @@ macro_rules! impl_ops_log_softmax_for { map1(xs, rs, |t| t - logsum) } - fn log_softmax_grad(&self, x: &SharedTensor<$t>, x_diff: &SharedTensor<$t>, - result_diff: &mut SharedTensor<$t>) - -> Result<(), $crate::co::error::Error> { + fn log_softmax_grad( + &self, + x: &SharedTensor<$t>, + x_diff: &SharedTensor<$t>, + result_diff: &mut SharedTensor<$t>, + ) -> Result<(), $crate::co::error::Error> { let xs = read!(x, $t, self); let dxs = read!(x_diff, $t, self); let drs = write_only!(result_diff, $t, self); - let mut sum : $t = 0.0; + let mut sum: $t = 0.0; for &grad_val in dxs.iter() { sum += grad_val; } map2(xs, dxs, drs, |t, dt| dt - t.exp() * sum) } } - ); + }; } - /// lrn impl generation macro /// TODO it's all unimplemented!() right now #[macro_export] macro_rules! impl_ops_lrn_for { - ($t:ident, $b:ty) => ( + ($t:ident, $b:ty) => { impl ::plugin::LRN<$t> for $b { fn new_lrn_config( &self, n: u32, alpha: f64, beta: f64, - k: f64 + k: f64, ) -> Result { unimplemented!(); Ok(::frameworks::native::helper::NormalizationConfig) @@ -369,7 +411,7 @@ macro_rules! impl_ops_lrn_for { &self, x: &mut SharedTensor<$t>, result: &mut SharedTensor<$t>, - config: &Self::CLRN + config: &Self::CLRN, ) -> Result<(), ::co::error::Error> { unimplemented!(); Ok(()) @@ -379,7 +421,7 @@ macro_rules! impl_ops_lrn_for { &self, x: &SharedTensor<$t>, result: &mut SharedTensor<$t>, - config: &Self::CLRN + config: &Self::CLRN, ) -> Result<(), ::co::error::Error> { unimplemented!(); Ok(()) @@ -391,7 +433,7 @@ macro_rules! impl_ops_lrn_for { x_diff: &mut SharedTensor<$t>, result: &mut SharedTensor<$t>, result_diff: &mut SharedTensor<$t>, - config: &Self::CLRN + config: &Self::CLRN, ) -> Result<(), ::co::error::Error> { unimplemented!(); Ok(()) @@ -403,11 +445,11 @@ macro_rules! impl_ops_lrn_for { x_diff: &SharedTensor<$t>, result: &SharedTensor<$t>, result_diff: &mut SharedTensor<$t>, - config: &Self::CLRN + config: &Self::CLRN, ) -> Result<(), ::co::error::Error> { unimplemented!(); Ok(()) } } - ); + }; } diff --git a/coaster-nn/src/frameworks/native/mod.rs b/coaster-nn/src/frameworks/native/mod.rs index dd9b11d58..6c3af4791 100644 --- a/coaster-nn/src/frameworks/native/mod.rs +++ b/coaster-nn/src/frameworks/native/mod.rs @@ -13,11 +13,11 @@ use rand::{Rng, SeedableRng}; #[cfg(feature = "native")] use rand_hc as hc128; -use crate::co::Error; -use crate::co::plugin::Error as PluginError; use crate::co::plugin::numeric_helpers::Bounded; use crate::co::plugin::numeric_helpers::Float; +use crate::co::plugin::Error as PluginError; use crate::co::prelude::*; +use crate::co::Error; use crate::plugin::*; #[macro_use] @@ -32,10 +32,10 @@ fn lens_eq(xs: &[T], ys: &[T]) -> Result<(), Error> { Ok(()) } - fn map1_inplace(src: &mut [T], f: F) -> Result<(), Error> - where T: Float, - F: Fn(T) -> T +where + T: Float, + F: Fn(T) -> T, { for i in 0..src.len() { src[i] = f(src[i]); @@ -44,8 +44,9 @@ fn map1_inplace(src: &mut [T], f: F) -> Result<(), Error> } fn map2_inplace(src1: &[T], src2: &mut [T], f: F) -> Result<(), Error> - where T: Float, - F: Fn(T, T) -> T +where + T: Float, + F: Fn(T, T) -> T, { lens_eq(src1, src2)?; for i in 0..src2.len() { @@ -55,8 +56,9 @@ fn map2_inplace(src1: &[T], src2: &mut [T], f: F) -> Result<(), Error> } fn map1(src: &[T], dst: &mut [T], f: F) -> Result<(), Error> - where T: Float, - F: Fn(T) -> T +where + T: Float, + F: Fn(T) -> T, { lens_eq(dst, src)?; for i in 0..dst.len() { @@ -66,8 +68,9 @@ fn map1(src: &[T], dst: &mut [T], f: F) -> Result<(), Error> } fn map2(src1: &[T], src2: &[T], dst: &mut [T], f: F) -> Result<(), Error> - where T: Float, - F: Fn(T, T) -> T +where + T: Float, + F: Fn(T, T) -> T, { lens_eq(dst, src1)?; lens_eq(dst, src2)?; @@ -77,9 +80,9 @@ fn map2(src1: &[T], src2: &[T], dst: &mut [T], f: F) -> Result<(), Error> Ok(()) } - impl NN for Backend - where T: Add + Mul + Default + Copy +where + T: Add + Mul + Default + Copy, { type CC = helper::ConvolutionConfig; type CLRN = helper::NormalizationConfig; @@ -91,104 +94,99 @@ impl NN for Backend fn init_nn() {} } -impl<'a, T> NNOperationConfig for helper::ConvolutionConfig - where T: Add + Mul + Default + Copy +impl<'a, T> NNOperationConfig for helper::ConvolutionConfig where + T: Add + Mul + Default + Copy { } -impl<'a, T> ConvolutionConfig for helper::ConvolutionConfig - where T: Add + Mul + Default + Copy +impl<'a, T> ConvolutionConfig for helper::ConvolutionConfig where + T: Add + Mul + Default + Copy { } -impl<'a, T> RnnConfig for helper::RnnConfig -where T: Add + Mul + Default + Copy +impl<'a, T> RnnConfig for helper::RnnConfig where + T: Add + Mul + Default + Copy { } -impl NNOperationConfig for helper::NormalizationConfig - where T: Add + Mul + Default + Copy +impl NNOperationConfig for helper::NormalizationConfig where + T: Add + Mul + Default + Copy { } -impl NNOperationConfig for helper::PoolingConfig - where T: Add + Mul + Default + Copy +impl NNOperationConfig for helper::PoolingConfig where + T: Add + Mul + Default + Copy { } // impl NNOperationConfig for helper::ActivationConfig // where T: Add + Mul + Default + Copy // { // } -impl NNOperationConfig for helper::DropoutConfig - where T: Add + Mul + Default + Copy +impl NNOperationConfig for helper::DropoutConfig where + T: Add + Mul + Default + Copy { } -impl NNOperationConfig for helper::RnnConfig - where T: Add + Mul + Default + Copy +impl NNOperationConfig for helper::RnnConfig where + T: Add + Mul + Default + Copy { } impl Convolution for Backend - where T: Add + Mul + Default + Copy +where + T: Add + Mul + Default + Copy, { - fn new_convolution_config(&self, - src: &SharedTensor, - dest: &SharedTensor, - filter: &SharedTensor, - algo_fwd: ConvForwardAlgo, - algo_bwd_filter: ConvBackwardFilterAlgo, - algo_bwd_data: ConvBackwardDataAlgo, - stride: &[i32], - zero_padding: &[i32]) - -> Result { + fn new_convolution_config( + &self, + src: &SharedTensor, + dest: &SharedTensor, + filter: &SharedTensor, + algo_fwd: ConvForwardAlgo, + algo_bwd_filter: ConvBackwardFilterAlgo, + algo_bwd_data: ConvBackwardDataAlgo, + stride: &[i32], + zero_padding: &[i32], + ) -> Result { // TODO: check dimensions of config match algo_fwd { - ConvForwardAlgo::Auto | - ConvForwardAlgo::ImplicitGEMM => {} + ConvForwardAlgo::Auto | ConvForwardAlgo::ImplicitGEMM => {} _ => { return Err(Error::Plugin(PluginError::Plugin("Unimplemented."))); } } match algo_bwd_filter { - ConvBackwardFilterAlgo::Auto | - ConvBackwardFilterAlgo::ImplicitGEMM => {} + ConvBackwardFilterAlgo::Auto | ConvBackwardFilterAlgo::ImplicitGEMM => {} _ => { return Err(Error::Plugin(PluginError::Plugin("Unimplemented."))); } } match algo_bwd_data { - ConvBackwardDataAlgo::Auto | - ConvBackwardDataAlgo::ImplicitGEMM => {} + ConvBackwardDataAlgo::Auto | ConvBackwardDataAlgo::ImplicitGEMM => {} _ => { return Err(Error::Plugin(PluginError::Plugin("Unimplemented."))); } } Ok(helper::ConvolutionConfig { - filter_shape: filter.desc().clone(), - stride: stride.to_vec(), - padding: zero_padding.to_vec(), - }) + filter_shape: filter.desc().clone(), + stride: stride.to_vec(), + padding: zero_padding.to_vec(), + }) } - fn convolution(&self, - filter: &SharedTensor, - x: &SharedTensor, - result: &mut SharedTensor, - _workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + fn convolution( + &self, + filter: &SharedTensor, + x: &SharedTensor, + result: &mut SharedTensor, + _workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { let dev = self.device(); let input_dim = x.desc(); - let input = x.read(dev) - .unwrap() - .as_slice::(); + let input = x.read(dev).unwrap().as_slice::(); let input_stride = input_dim.default_stride(); let output_dim = result.desc().clone(); // this is ok, we only read parts we already wrote - let output = result - .write_only(dev) - .unwrap() - .as_mut_slice::(); + let output = result.write_only(dev).unwrap().as_mut_slice::(); let output_stride = output_dim.default_stride(); { @@ -198,10 +196,7 @@ impl Convolution for Backend } let filter_dim = filter.desc(); - let filter = filter - .read(dev) - .unwrap() - .as_slice::(); + let filter = filter.read(dev).unwrap().as_slice::(); let filter_stride = filter_dim.default_stride(); // sanity check @@ -212,21 +207,23 @@ impl Convolution for Backend // TODO: specializations for spatial input // recursively sum up elementwise multiplication of the hyperplanes. - fn filter_(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - input_offset: usize, - input_idx_base: &[usize], - filter: &[T], - filter_stride: &[usize], - filter_dim: &[usize], - filter_offset: usize, - padding: &[i32], - depth: usize, - depth_end: usize, - acc: Option) - -> T - where T: Add + Mul + Default + Copy + fn filter_( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + input_offset: usize, + input_idx_base: &[usize], + filter: &[T], + filter_stride: &[usize], + filter_dim: &[usize], + filter_offset: usize, + padding: &[i32], + depth: usize, + depth_end: usize, + acc: Option, + ) -> T + where + T: Add + Mul + Default + Copy, { let mut acc = acc.unwrap_or_default(); @@ -243,19 +240,21 @@ impl Convolution for Backend } else if depth + 1 >= depth_end { input[i_offset] * filter[f_offset] } else { - filter_(input, - &input_stride[1..], - &input_dim[1..], - i_offset, - &input_idx_base[1..], - filter, - &filter_stride[1..], - &filter_dim[1..], - f_offset, - &padding[1..], - depth + 1, - depth_end, - None) + filter_( + input, + &input_stride[1..], + &input_dim[1..], + i_offset, + &input_idx_base[1..], + filter, + &filter_stride[1..], + &filter_dim[1..], + f_offset, + &padding[1..], + depth + 1, + depth_end, + None, + ) }; acc = acc + v; } @@ -263,24 +262,26 @@ impl Convolution for Backend } // depth == 0 is the first level - fn conv(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - top_input_offset: usize, - input_offset: usize, - input_idx_base: &mut [usize], - filter: &[T], - filter_stride: &[usize], - filter_dim: &[usize], - filter_offset: usize, - depth: usize, - padding: &[i32], - stride: &[i32], - output: &mut [T], - output_stride: &[usize], - output_dim: &[usize], - output_offset: usize) - where T: Add + Mul + Default + Copy + fn conv( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + top_input_offset: usize, + input_offset: usize, + input_idx_base: &mut [usize], + filter: &[T], + filter_stride: &[usize], + filter_dim: &[usize], + filter_offset: usize, + depth: usize, + padding: &[i32], + stride: &[i32], + output: &mut [T], + output_stride: &[usize], + output_dim: &[usize], + output_offset: usize, + ) where + T: Add + Mul + Default + Copy, { let p = padding[depth] as usize; //let input_end = input_dim[depth] + 2 * p - (filter_dim[depth]); @@ -292,58 +293,64 @@ impl Convolution for Backend let output_offset = output_offset + output_idx * output_stride[0]; if depth + 1 < input_dim.len() { - conv(input, - input_stride, - input_dim, - top_input_offset, - input_offset, - input_idx_base, - filter, - filter_stride, - filter_dim, - filter_offset, - depth + 1, - padding, - &stride[1..], - output, - &output_stride[1..], - &output_dim[1..], - output_offset); + conv( + input, + input_stride, + input_dim, + top_input_offset, + input_offset, + input_idx_base, + filter, + filter_stride, + filter_dim, + filter_offset, + depth + 1, + padding, + &stride[1..], + output, + &output_stride[1..], + &output_dim[1..], + output_offset, + ); } else { - let v = filter_(input, - input_stride, - input_dim, - top_input_offset, - &input_idx_base[..], - filter, - filter_stride, - filter_dim, - filter_offset, - padding, - 0, - input_dim.len(), - None); + let v = filter_( + input, + input_stride, + input_dim, + top_input_offset, + &input_idx_base[..], + filter, + filter_stride, + filter_dim, + filter_offset, + padding, + 0, + input_dim.len(), + None, + ); output[output_offset] = output[output_offset] + v; } } } - fn conv_k_d1(_batch: usize, - input: &[T], - input_stride: &[usize], - input_dim: &[usize], - input_offset: usize, - input_idx_base: &mut [usize], - filter: &[T], - filter_stride: &[usize], - filter_dim: &[usize], - padding: &[i32], - stride: &[i32], - output: &mut [T], - output_stride: &[usize], - output_dim: &[usize], - output_offset: usize) - where T: Add + Mul + Default + Copy + fn conv_k_d1( + _batch: usize, + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + input_offset: usize, + input_idx_base: &mut [usize], + filter: &[T], + filter_stride: &[usize], + filter_dim: &[usize], + padding: &[i32], + stride: &[i32], + output: &mut [T], + output_stride: &[usize], + output_dim: &[usize], + output_offset: usize, + ) where + T: Add + Mul + Default + Copy, { for k in 0..filter_dim[0] { let output_offset = output_offset + k * output_stride[0]; @@ -352,23 +359,25 @@ impl Convolution for Backend let input_offset = input_offset + d1 * input_stride[0]; let filter_offset = filter_offset + d1 * filter_stride[1]; - conv(input, - &input_stride[1..], - &input_dim[1..], - input_offset, - input_offset, - input_idx_base, - filter, - &filter_stride[2..], - &filter_dim[2..], - filter_offset, - 0, - padding, - stride, - output, - &output_stride[1..], - &output_dim[1..], - output_offset); + conv( + input, + &input_stride[1..], + &input_dim[1..], + input_offset, + input_offset, + input_idx_base, + filter, + &filter_stride[2..], + &filter_dim[2..], + filter_offset, + 0, + padding, + stride, + output, + &output_stride[1..], + &output_dim[1..], + output_offset, + ); } } } @@ -383,82 +392,83 @@ impl Convolution for Backend let input_offset = batch * input_stride[0]; let output_offset = batch * output_stride[0]; - conv_k_d1(batch, - input, - &input_stride[1..], - &input_dim[1..], - input_offset, - &mut input_idx[..], - filter, - &filter_stride[..], - &filter_dim[..], - &config.padding[..], - &config.stride[..], - output, - &output_stride[1..], - &output_dim[1..], - output_offset); + conv_k_d1( + batch, + input, + &input_stride[1..], + &input_dim[1..], + input_offset, + &mut input_idx[..], + filter, + &filter_stride[..], + &filter_dim[..], + &config.padding[..], + &config.stride[..], + output, + &output_stride[1..], + &output_dim[1..], + output_offset, + ); } Ok(()) } - fn convolution_grad_filter(&self, - src_data: &SharedTensor, - dest_diff: &SharedTensor, - filter_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + fn convolution_grad_filter( + &self, + src_data: &SharedTensor, + dest_diff: &SharedTensor, + filter_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { unimplemented!() } - fn convolution_grad_data(&self, - filter: &SharedTensor, - x_diff: &SharedTensor, - result_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), Error> { + fn convolution_grad_data( + &self, + filter: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), Error> { unimplemented!() } } - impl Pooling for Backend - where T: Add + Mul + Default + Copy + PartialOrd + Bounded +where + T: Add + Mul + Default + Copy + PartialOrd + Bounded, { - fn new_pooling_config(&self, - window: &[i32], - stride: &[i32], - padding: &[i32]) - -> Result { + fn new_pooling_config( + &self, + window: &[i32], + stride: &[i32], + padding: &[i32], + ) -> Result { Ok(helper::PoolingConfig { - window: window.to_vec(), - stride: stride.to_vec(), - padding: padding.to_vec(), - }) + window: window.to_vec(), + stride: stride.to_vec(), + padding: padding.to_vec(), + }) } - fn pooling_max(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_max( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let dev = self.device(); let input_dim = x.desc(); // [4, 4, 4, 4] - let input = x.read(dev) - .unwrap() - .as_slice::(); + let input = x.read(dev).unwrap().as_slice::(); let input_stride = input_dim.default_stride(); // [64, 16, 4, 1]; let output_dim = result.desc().clone(); // [4,4,2,2] - // this is ok, we only read parts we already wrote - let output = result - .write_only(dev) - .unwrap() - .as_mut_slice::(); + // this is ok, we only read parts we already wrote + let output = result.write_only(dev).unwrap().as_mut_slice::(); let output_stride = output_dim.default_stride(); // [16, 4, 2, 1] { for o in output.iter_mut() { @@ -466,18 +476,20 @@ impl Pooling for Backend } } - fn max_pooling_(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - input_offset: usize, - input_idx_base: &[usize], - window: &[i32], - padding: &[i32], - depth: usize, - depth_end: usize, - current_max: Option) - -> T - where T: Add + Mul + Default + Copy + PartialOrd + Bounded + fn max_pooling_( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + input_offset: usize, + input_idx_base: &[usize], + window: &[i32], + padding: &[i32], + depth: usize, + depth_end: usize, + current_max: Option, + ) -> T + where + T: Add + Mul + Default + Copy + PartialOrd + Bounded, { let mut current_max = current_max.unwrap_or(T::min_value()); @@ -494,16 +506,18 @@ impl Pooling for Backend if depth + 1 >= depth_end { input[i_mem_offset] } else { - max_pooling_(input, - &input_stride[1..], - &input_dim[1..], - i_mem_offset, - &input_idx_base[1..], - &window[1..], - &padding[1..], - depth + 1, - depth_end, - None) + max_pooling_( + input, + &input_stride[1..], + &input_dim[1..], + i_mem_offset, + &input_idx_base[1..], + &window[1..], + &padding[1..], + depth + 1, + depth_end, + None, + ) } }; // TODO: Handle NAN, inf and so on @@ -512,28 +526,30 @@ impl Pooling for Backend } else if current_max < v { v } else { - //TODO honour the configuration to pass on NaN or not, see cudnn API + //TODO honour the configuration to pass on NaN or not, see cudnn API panic!("NaN") }; } current_max } - fn recurse(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - top_input_offset: usize, - input_offset: usize, - input_idx_base: &mut [usize], - window: &[i32], - depth: usize, - stride: &[i32], - padding: &[i32], - output: &mut [T], - output_stride: &[usize], - output_dim: &[usize], - output_offset: usize) - where T: Add + Mul + Default + Copy + PartialOrd + Bounded + fn recurse( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + top_input_offset: usize, + input_offset: usize, + input_idx_base: &mut [usize], + window: &[i32], + depth: usize, + stride: &[i32], + padding: &[i32], + output: &mut [T], + output_stride: &[usize], + output_dim: &[usize], + output_offset: usize, + ) where + T: Add + Mul + Default + Copy + PartialOrd + Bounded, { let p = padding[depth] as usize; // 0 let w = window[depth] as usize; // 2 @@ -547,37 +563,40 @@ impl Pooling for Backend //println!("input_offset {} <- output_offset {}", input_offset, output_offset); if depth + 1 < input_dim.len() { - recurse(input, - input_stride, - input_dim, - top_input_offset, - input_offset, - input_idx_base, - window, - depth + 1, - &stride[1..], - padding, - output, - &output_stride[1..], - &output_dim[1..], - output_offset); + recurse( + input, + input_stride, + input_dim, + top_input_offset, + input_offset, + input_idx_base, + window, + depth + 1, + &stride[1..], + padding, + output, + &output_stride[1..], + &output_dim[1..], + output_offset, + ); } else { - let v = max_pooling_(input, - input_stride, - input_dim, - top_input_offset, - &input_idx_base[..], - window, - padding, - 0, - input_dim.len(), - None); + let v = max_pooling_( + input, + input_stride, + input_dim, + top_input_offset, + &input_idx_base[..], + window, + padding, + 0, + input_dim.len(), + None, + ); output[output_offset] = v; } } } - let mut input_idx = Vec::new(); input_idx.resize(input_dim.len() - 2, 0); let mut output_idx = Vec::new(); @@ -597,20 +616,22 @@ impl Pooling for Backend let input_offset = input_offset + d1 * input_stride[1]; let output_offset = output_offset + d1 * output_stride[1]; // pass on the remaining dimensions (no batches, no channels, thus [2..] - recurse(input, - &input_stride[2..], - &input_dim[2..], - input_offset, - input_offset, - &mut input_idx, - &window, - 0, - &stride, - &padding, - output, - &output_stride[2..], - &output_dim[2..], - output_offset); + recurse( + input, + &input_stride[2..], + &input_dim[2..], + input_offset, + input_offset, + &mut input_idx, + &window, + 0, + &stride, + &padding, + output, + &output_stride[2..], + &output_dim[2..], + output_offset, + ); } } @@ -620,21 +641,19 @@ impl Pooling for Backend // x, x_diff are known outputs of the forward propagation // result is the previous layer which derivate we want to know // FIXME verify - fn pooling_max_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { - + fn pooling_max_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { let dev = self.device(); let input_dim = x.desc(); // [] println!("x dims {:?}", input_dim); - let input = x.read(dev) - .unwrap() - .as_slice::(); + let input = x.read(dev).unwrap().as_slice::(); let input_stride = input_dim.default_stride(); // []; let x_diff_dim = x_diff.desc(); // [] @@ -646,10 +665,7 @@ impl Pooling for Backend println!("result_diff dims {:?}", output_dim); // this is ok, we only read parts we already wrote - let output = result_diff - .write_only(dev) - .unwrap() - .as_mut_slice::(); + let output = result_diff.write_only(dev).unwrap().as_mut_slice::(); let output_stride = output_dim.default_stride(); // [] { for o in output.iter_mut() { @@ -657,21 +673,26 @@ impl Pooling for Backend } } - fn max_pooling_(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - input_offset: usize, - input_idx_base: &[usize], - window: &[i32], - padding: &[i32], - depth: usize, - depth_end: usize, - current_max: Option, - current_max_index: Option) - -> (T, usize) - where T: Add + Mul + Default + Copy + PartialOrd + Bounded + fn max_pooling_( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + input_offset: usize, + input_idx_base: &[usize], + window: &[i32], + padding: &[i32], + depth: usize, + depth_end: usize, + current_max: Option, + current_max_index: Option, + ) -> (T, usize) + where + T: Add + Mul + Default + Copy + PartialOrd + Bounded, { - let mut current_max = (current_max.unwrap_or(T::min_value()), current_max_index.unwrap_or(0usize)); + let mut current_max = ( + current_max.unwrap_or(T::min_value()), + current_max_index.unwrap_or(0usize), + ); let p = padding[0] as usize; let input_idx_end = input_dim[0] + 2 * p; @@ -686,17 +707,19 @@ impl Pooling for Backend if depth + 1 >= depth_end { (input[i_mem_offset], i_mem_offset) } else { - max_pooling_(input, - &input_stride[1..], - &input_dim[1..], - i_mem_offset, - &input_idx_base[1..], - &window[1..], - &padding[1..], - depth + 1, - depth_end, - None, - None) + max_pooling_( + input, + &input_stride[1..], + &input_dim[1..], + i_mem_offset, + &input_idx_base[1..], + &window[1..], + &padding[1..], + depth + 1, + depth_end, + None, + None, + ) } }; current_max = if current_max.0 >= v { @@ -704,29 +727,31 @@ impl Pooling for Backend } else if current_max.0 < v { (v, v_index) } else { - //TODO honour the configuration to pass on NaN or not, see cudnn API + //TODO honour the configuration to pass on NaN or not, see cudnn API panic!("NaN") }; } current_max } - fn recurse(input: &[T], - input_stride: &[usize], - input_dim: &[usize], - top_input_offset: usize, - input_offset: usize, - input_idx_base: &mut [usize], - window: &[i32], - depth: usize, - stride: &[i32], - padding: &[i32], - output: &mut [T], - output_stride: &[usize], - output_dim: &[usize], - output_offset: usize, - dx: &[T]) - where T: Add + Mul + Default + Copy + PartialOrd + Bounded + fn recurse( + input: &[T], + input_stride: &[usize], + input_dim: &[usize], + top_input_offset: usize, + input_offset: usize, + input_idx_base: &mut [usize], + window: &[i32], + depth: usize, + stride: &[i32], + padding: &[i32], + output: &mut [T], + output_stride: &[usize], + output_dim: &[usize], + output_offset: usize, + dx: &[T], + ) where + T: Add + Mul + Default + Copy + PartialOrd + Bounded, { let p = padding[depth] as usize; // 0 let w = window[depth] as usize; // 2 @@ -740,32 +765,37 @@ impl Pooling for Backend //println!("input_offset {} <- output_offset {}", input_offset, output_offset); if depth + 1 < input_dim.len() { - recurse(input, - input_stride, - input_dim, - top_input_offset, - input_offset, - input_idx_base, - window, - depth + 1, - &stride[1..], - padding, - output, - &output_stride[1..], - &output_dim[1..], - output_offset, - dx); + recurse( + input, + input_stride, + input_dim, + top_input_offset, + input_offset, + input_idx_base, + window, + depth + 1, + &stride[1..], + padding, + output, + &output_stride[1..], + &output_dim[1..], + output_offset, + dx, + ); } else { - let (val, index) = max_pooling_(input, - input_stride, - input_dim, - top_input_offset, - &input_idx_base[..], - window, - padding, - 0, - input_dim.len(), - None, None); + let (val, index) = max_pooling_( + input, + input_stride, + input_dim, + top_input_offset, + &input_idx_base[..], + window, + padding, + 0, + input_dim.len(), + None, + None, + ); // if the stride is 1 and the size is i.e. multiple outputs of the forward propagation // can map back to one input // TODO sum up @@ -774,7 +804,6 @@ impl Pooling for Backend } } - let mut input_idx = Vec::new(); input_idx.resize(input_dim.len() - 2, 0); let mut output_idx = Vec::new(); @@ -794,48 +823,67 @@ impl Pooling for Backend let input_offset = input_offset + d1 * input_stride[1]; let output_offset = output_offset + d1 * output_stride[1]; // pass on the remaining dimensions (no batches, no channels, thus [2..] - recurse(input, - &input_stride[2..], - &input_dim[2..], - input_offset, - input_offset, - &mut input_idx, - &window, - 0, - &stride, - &padding, - output, - &output_stride[2..], - &output_dim[2..], - output_offset, - x_diff); + recurse( + input, + &input_stride[2..], + &input_dim[2..], + input_offset, + input_offset, + &mut input_idx, + &window, + 0, + &stride, + &padding, + output, + &output_stride[2..], + &output_dim[2..], + output_offset, + x_diff, + ); } } Ok(()) } - fn pooling_avg(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_avg( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { return Err(Error::Plugin(PluginError::Plugin("Unimplemented."))); } - fn pooling_avg_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CPOOL) - -> Result<(), Error> { + fn pooling_avg_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), Error> { return Err(Error::Plugin(PluginError::Plugin("Unimplemented."))); } } impl Rnn for Backend - where T: Float + Default + Copy + PartialOrd + Bounded { - fn new_rnn_config(&self, src: &SharedTensor, dropout_probability: Option, dropout_seed: Option, sequence_length: i32, network_mode: RnnNetworkMode, input_mode: RnnInputMode, direction_mode: DirectionMode, algorithm: RnnAlgorithm, hidden_size: i32, num_layers: i32, batch_size: i32) -> Result { +where + T: Float + Default + Copy + PartialOrd + Bounded, +{ + fn new_rnn_config( + &self, + src: &SharedTensor, + dropout_probability: Option, + dropout_seed: Option, + sequence_length: i32, + network_mode: RnnNetworkMode, + input_mode: RnnInputMode, + direction_mode: DirectionMode, + algorithm: RnnAlgorithm, + hidden_size: i32, + num_layers: i32, + batch_size: i32, + ) -> Result { // TODO: Implement Config to hold parameters regarding the RNN unimplemented!() } @@ -862,26 +910,28 @@ impl Rnn for Backend unimplemented!() } - fn rnn_backward_data(&self, - src: &SharedTensor, - src_gradient: &mut SharedTensor, - output: &SharedTensor, - output_gradient: &SharedTensor, - rnn_config: &Self::CRNN, - weight: &SharedTensor, - workspace: &mut SharedTensor) - -> Result<(), Error> { + fn rnn_backward_data( + &self, + src: &SharedTensor, + src_gradient: &mut SharedTensor, + output: &SharedTensor, + output_gradient: &SharedTensor, + rnn_config: &Self::CRNN, + weight: &SharedTensor, + workspace: &mut SharedTensor, + ) -> Result<(), Error> { // TODO: Implement Backward Pass for RNN for the Input unimplemented!() } - fn rnn_backward_weights(&self, - src: &SharedTensor, - output: &SharedTensor, - filter: &mut SharedTensor, - rnn_config: &Self::CRNN, - workspace: &mut SharedTensor) - -> Result<(), Error> { + fn rnn_backward_weights( + &self, + src: &SharedTensor, + output: &SharedTensor, + filter: &mut SharedTensor, + rnn_config: &Self::CRNN, + workspace: &mut SharedTensor, + ) -> Result<(), Error> { // TODO: Implement Backward Pass with respect to Weights unimplemented!() } @@ -889,46 +939,38 @@ impl Rnn for Backend #[cfg(feature = "native")] impl Dropout for Backend - where T: Float + Add + Mul + Default + Copy + PartialOrd + Bounded +where + T: Float + Add + Mul + Default + Copy + PartialOrd + Bounded, { - fn new_dropout_config(&self, - probability: f32, - seed: u64, - ) - -> Result { - Ok(helper::DropoutConfig{probability, seed}) + fn new_dropout_config(&self, probability: f32, seed: u64) -> Result { + Ok(helper::DropoutConfig { probability, seed }) } // TODO this is supposed to be an in place operation #[cfg(feature = "native")] - fn dropout(&self, - x: &SharedTensor, - result: &mut SharedTensor, - config: &Self::CDROP) - -> Result<(), Error> { + fn dropout( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), Error> { let dev = self.device(); let input_dim = x.desc(); // [4, 4, 4, 4] - let input = x.read(dev) - .unwrap() - .as_slice::(); + let input = x.read(dev).unwrap().as_slice::(); let output_dim = result.desc().clone(); // [4,4,2,2] - let output = result - .write_only(dev) - .unwrap() - .as_mut_slice::(); + let output = result.write_only(dev).unwrap().as_mut_slice::(); output.clone_from_slice(input); - - let seed : [u8;8] = config.seed.to_le_bytes(); + let seed: [u8; 8] = config.seed.to_le_bytes(); let mut extrapolated_seed = [0u8; 32]; extrapolated_seed[0..8].copy_from_slice(&seed[..]); let mut rng = hc128::Hc128Rng::from_seed(extrapolated_seed); for i in 0..output.len() { - if rng.gen_range(0f32,1f32) >= config.probability { + if rng.gen_range(0f32, 1f32) >= config.probability { output[i] = input[i]; } else { output[i] = T::zero(); @@ -938,13 +980,14 @@ impl Dropout for Backend } #[allow(unused_variables)] - fn dropout_grad(&self, - x: &SharedTensor, - x_diff: &SharedTensor, - result: &SharedTensor, - result_diff: &mut SharedTensor, - config: &Self::CDROP) - -> Result<(), Error> { + fn dropout_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), Error> { // TODO check if there is anything to do here? Ok(()) } diff --git a/coaster-nn/src/lib.rs b/coaster-nn/src/lib.rs index 353092488..bec004175 100644 --- a/coaster-nn/src/lib.rs +++ b/coaster-nn/src/lib.rs @@ -118,18 +118,18 @@ pub use crate::plugin::*; extern crate coaster as co; -#[cfg(feature = "cuda")] -extern crate rcudnn as cudnn; extern crate libc; extern crate log; +#[cfg(feature = "cuda")] +extern crate rcudnn as cudnn; -#[cfg(feature = "native")] -extern crate rand_hc; #[cfg(feature = "native")] extern crate rand; +#[cfg(feature = "native")] +extern crate rand_hc; -mod plugin; pub mod frameworks; +mod plugin; #[cfg(test)] mod tests; diff --git a/coaster-nn/src/plugin.rs b/coaster-nn/src/plugin.rs index dcc2248c1..ddb8ee2c5 100644 --- a/coaster-nn/src/plugin.rs +++ b/coaster-nn/src/plugin.rs @@ -40,7 +40,7 @@ impl ConvForwardAlgo { pub fn is_auto(&self) -> bool { match *self { ConvForwardAlgo::Auto => true, - _ => false + _ => false, } } } @@ -81,7 +81,7 @@ impl ConvBackwardFilterAlgo { pub fn is_auto(&self) -> bool { match *self { ConvBackwardFilterAlgo::Auto => true, - _ => false + _ => false, } } } @@ -126,7 +126,7 @@ impl ConvBackwardDataAlgo { pub fn is_auto(&self) -> bool { match *self { ConvBackwardDataAlgo::Auto => true, - _ => false + _ => false, } } } @@ -153,7 +153,9 @@ pub trait ConvolutionConfig { pub trait RnnConfig { /// Workspace Size - Overwritten by each plugin method except native, which doesn't require /// a workspace size. - fn workspace_size(&self) -> usize { 0 } + fn workspace_size(&self) -> usize { + 0 + } } /// Provides the functionality for a backend to support Neural Network related operations. @@ -176,25 +178,32 @@ pub trait NN { } /// Provides the functionality for a Backend to support Sigmoid operations. -pub trait Sigmoid : NN { +pub trait Sigmoid: NN { /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result to `result`. - fn sigmoid(&self, x: &SharedTensor, result: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn sigmoid( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result to `result_diff`. - fn sigmoid_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn sigmoid_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for pointwise Sigmoid operations (overwrites the input with the result of the operation). -pub trait SigmoidPointwise : NN { +pub trait SigmoidPointwise: NN { /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x`. /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// @@ -207,28 +216,40 @@ pub trait SigmoidPointwise : NN { /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function /// /// Saves the result back to `x_diff`. - fn sigmoid_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) -> Result<(), crate::co::error::Error>; + fn sigmoid_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support ReLU operations. -pub trait Relu : NN { +pub trait Relu: NN { /// Computes the [Rectified linear units][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result to `result`. - fn relu(&self, x: &SharedTensor, result: &mut SharedTensor) -> Result<(), crate::co::error::Error>; + fn relu( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of [ReLU][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result to `result_diff`. - fn relu_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn relu_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for pointwise ReLU operations (overwrites the input with the result of the operation). -pub trait ReluPointwise : NN { +pub trait ReluPointwise: NN { /// Computes the [Rectified linear units][relu] over the input Tensor `x`. /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// @@ -239,31 +260,41 @@ pub trait ReluPointwise : NN { /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks) /// /// Saves the result back to `x_diff`. - fn relu_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn relu_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support TanH operations. -pub trait Tanh : NN { +pub trait Tanh: NN { /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result to `result`. - fn tanh(&self, x: &SharedTensor, result: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn tanh( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of [hyperbolic Tangent][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result to `result_diff`. - fn tanh_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn tanh_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for pointwise ReLU operations (overwrites the input /// with the result of the operation). -pub trait TanhPointwise : NN { +pub trait TanhPointwise: NN { /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x`. /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// @@ -274,8 +305,11 @@ pub trait TanhPointwise : NN { /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function /// /// Saves the result back to `x_diff`. - fn tanh_pointwise_grad(&self, x: &SharedTensor, x_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn tanh_pointwise_grad( + &self, + x: &SharedTensor, + x_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provide the functionality for a Backend to support RNN operations @@ -319,24 +353,26 @@ pub trait Rnn: NN { ) -> Result<(), crate::co::error::Error>; /// Calculates RNN Gradients for Input/Hidden/Cell - fn rnn_backward_data(&self, - src: &SharedTensor, - src_gradient: &mut SharedTensor, - output: &SharedTensor, - output_gradient: &SharedTensor, - rnn_config: &Self::CRNN, - weight: &SharedTensor, - workspace: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn rnn_backward_data( + &self, + src: &SharedTensor, + src_gradient: &mut SharedTensor, + output: &SharedTensor, + output_gradient: &SharedTensor, + rnn_config: &Self::CRNN, + weight: &SharedTensor, + workspace: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Calculates RNN Gradients for Weights - fn rnn_backward_weights(&self, - src: &SharedTensor, - output: &SharedTensor, - filter: &mut SharedTensor, - rnn_config: &Self::CRNN, - workspace: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn rnn_backward_weights( + &self, + src: &SharedTensor, + output: &SharedTensor, + filter: &mut SharedTensor, + rnn_config: &Self::CRNN, + workspace: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } #[derive(Debug, Copy, Clone)] @@ -350,7 +386,7 @@ pub enum RnnNetworkMode { /// Four-gate LSTM Network with no peephole connection LSTM, /// Three-gate network with Gated Recurrent Units - GRU + GRU, } impl std::fmt::Display for RnnNetworkMode { @@ -360,7 +396,8 @@ impl std::fmt::Display for RnnNetworkMode { RnnNetworkMode::Tanh => "Tanh", RnnNetworkMode::LSTM => "LSTM", RnnNetworkMode::GRU => "GRU", - }.to_owned(); + } + .to_owned(); write!(f, "{}", result) } } @@ -395,8 +432,9 @@ impl std::fmt::Display for RnnInputMode { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let result = match &self { RnnInputMode::LinearInput => "LinearInput", - RnnInputMode::SkipInput => "SkipInput" - }.to_owned(); + RnnInputMode::SkipInput => "SkipInput", + } + .to_owned(); write!(f, "{}", result) } } @@ -426,8 +464,9 @@ impl std::fmt::Display for DirectionMode { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let result = match &self { DirectionMode::UniDirectional => "UniDirectional", - DirectionMode::BiDirectional => "BiDirectional" - }.to_owned(); + DirectionMode::BiDirectional => "BiDirectional", + } + .to_owned(); write!(f, "{}", result) } } @@ -466,8 +505,9 @@ impl std::fmt::Display for RnnAlgorithm { RnnAlgorithm::Standard => "Standard", RnnAlgorithm::PersistStatic => "PersistStatic", RnnAlgorithm::PersistDynamic => "PersistDynamic", - RnnAlgorithm::Count => unreachable!() - }.to_owned(); + RnnAlgorithm::Count => unreachable!(), + } + .to_owned(); write!(f, "{}", result) } } @@ -479,7 +519,9 @@ impl RnnAlgorithm { "Standard" => Ok(RnnAlgorithm::Standard), "PersistStatic" => Ok(RnnAlgorithm::PersistStatic), "PersistDynamic" => Ok(RnnAlgorithm::PersistDynamic), - _ => Err("Unknown RnnAlgorithm used - variants are Standard, PersistStatic, PersistDynamic"), + _ => Err( + "Unknown RnnAlgorithm used - variants are Standard, PersistStatic, PersistDynamic", + ), } } } @@ -503,60 +545,64 @@ pub enum MathType { /// Uses Tensor Core ops TensorOPMath, /// Uses FP32 Tensors for input/output - TensorOPMathAllowConversion + TensorOPMathAllowConversion, } /// Provides the functionality for a Backend to support Convolution operations. -pub trait Convolution : NN { +pub trait Convolution: NN { /// Creates a new ConvolutionConfig, which needs to be passed to further /// convolution Operations. - fn new_convolution_config(&self, - src: &SharedTensor, - dest: &SharedTensor, - filter: &SharedTensor, - algo_fwd: ConvForwardAlgo, - algo_bwd_filter: ConvBackwardFilterAlgo, - algo_bwd_data: ConvBackwardDataAlgo, - stride: &[i32], - zero_padding: &[i32]) - -> Result; + fn new_convolution_config( + &self, + src: &SharedTensor, + dest: &SharedTensor, + filter: &SharedTensor, + algo_fwd: ConvForwardAlgo, + algo_bwd_filter: ConvBackwardFilterAlgo, + algo_bwd_data: ConvBackwardDataAlgo, + stride: &[i32], + zero_padding: &[i32], + ) -> Result; /// Computes a [CNN convolution][convolution] over the input Tensor `x`. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `result`. - fn convolution(&self, - filter: &SharedTensor, - x: &SharedTensor, - result: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), crate::co::error::Error>; + fn convolution( + &self, + filter: &SharedTensor, + x: &SharedTensor, + result: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a [CNN convolution][convolution] with respect to the filter. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `filter_diff`. - fn convolution_grad_filter(&self, - src_data: &SharedTensor, - dest_diff: &SharedTensor, - filter_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), crate::co::error::Error>; + fn convolution_grad_filter( + &self, + src_data: &SharedTensor, + dest_diff: &SharedTensor, + filter_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a [CNN convolution][convolution] over the input /// Tensor `x` with respect to the data. /// [convolution]: https://en.wikipedia.org/wiki/Convolutional_neural_network /// /// Saves the result to `result_diff`. - fn convolution_grad_data(&self, - filter: &SharedTensor, - x_diff: &SharedTensor, - result_diff: &mut SharedTensor, - workspace: &mut SharedTensor, - config: &Self::CC) - -> Result<(), crate::co::error::Error>; + fn convolution_grad_data( + &self, + filter: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + workspace: &mut SharedTensor, + config: &Self::CC, + ) -> Result<(), crate::co::error::Error>; // /// Computes the backward Convolution function w.r.t the bias. // /// @@ -586,120 +632,177 @@ pub trait Convolution : NN { } /// Provides the functionality for a Backend to support Softmax operations. -pub trait Softmax : NN { +pub trait Softmax: NN { /// Computes a [Softmax][softmax] over the input Tensor `x`. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result`. - fn softmax(&self, x: &SharedTensor, result: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn softmax( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a [Softmax][softmax] over the input Tensor `x`. /// [softmax]: https://en.wikipedia.org/wiki/Softmax_function /// /// Saves the result to `result_diff`. - fn softmax_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn softmax_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support LogSoftmax operations. -pub trait LogSoftmax : NN { +pub trait LogSoftmax: NN { /// Computes a logarithmic softmax over the input Tensor `x`. /// /// Saves the result to `result`. - fn log_softmax(&self, x: &SharedTensor, result: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn log_softmax( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a logarithmic softmax over the input Tensor `x`. /// /// Saves the result to `result_diff`. - fn log_softmax_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result_diff: &mut SharedTensor) - -> Result<(), crate::co::error::Error>; + fn log_softmax_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result_diff: &mut SharedTensor, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support Local Response Normalization operations. -pub trait LRN : NN { +pub trait LRN: NN { /// Creates a new (Local Response Normalization) LRNConfig, which needs to be /// passed to further LRN Operations. - fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) - -> Result; + fn new_lrn_config( + &self, + n: u32, + alpha: f64, + beta: f64, + k: f64, + ) -> Result; /// Computes a [LRN][lrn] over the input Tensor `x`. /// [lrn]: https://en.wikipedia.org/wiki/lrnal_neural_network /// /// Saves the result to `result`. - fn lrn(&self, x: &SharedTensor, result: &mut SharedTensor, - config: &Self::CLRN) -> Result<(), crate::co::error::Error>; + fn lrn( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CLRN, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of a [LRN][lrn] over the input Tensor `x`. /// [lrn]: https://en.wikipedia.org/wiki/lrnal_neural_network /// /// Saves the result to `result_diff`. - fn lrn_grad(&self, - x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor, - config: &Self::CLRN) - -> Result<(), crate::co::error::Error>; + fn lrn_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CLRN, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support Pooling operations. -pub trait Pooling : NN { +pub trait Pooling: NN { /// Creates a new PoolingConfig, which needs to be passed to further pooling Operations. - fn new_pooling_config(&self, window: &[i32], stride: &[i32], padding: &[i32]) - -> Result; + fn new_pooling_config( + &self, + window: &[i32], + stride: &[i32], + padding: &[i32], + ) -> Result; /// Computes non-linear down-sampling ([max Pooling][pooling]) over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result`. - fn pooling_max(&self, x: &SharedTensor, result: &mut SharedTensor, - config: &Self::CPOOL) -> Result<(), crate::co::error::Error>; + fn pooling_max( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of [max Pooling][pooling] over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result_diff`. - fn pooling_max_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor, - config: &Self::CPOOL) -> Result<(), crate::co::error::Error>; - + fn pooling_max_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), crate::co::error::Error>; /// Computes non-linear down-sampling ([average Pooling][pooling]) over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result`. - fn pooling_avg(&self, x: &SharedTensor, result: &mut SharedTensor, - config: &Self::CPOOL) -> Result<(), crate::co::error::Error>; + fn pooling_avg( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), crate::co::error::Error>; /// Computes the gradient of [average Pooling][pooling] over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result_diff`. - fn pooling_avg_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor, - config: &Self::CPOOL) -> Result<(), crate::co::error::Error>; + fn pooling_avg_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CPOOL, + ) -> Result<(), crate::co::error::Error>; } /// Provides the functionality for a Backend to support Dropout operations. -pub trait Dropout : NN { +pub trait Dropout: NN { /// Creates a new DropoutConfig, which needs to be passed to further dropout Operations. - fn new_dropout_config(&self, dropout: f32, seed: u64) - -> Result; + fn new_dropout_config( + &self, + dropout: f32, + seed: u64, + ) -> Result; /// Computes non-linear down-sampling ([max Pooling][pooling]) over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Convolutional_neural_network#Pooling_layer /// /// Saves the result to `result`. - fn dropout(&self, x: &SharedTensor, result: &mut SharedTensor, - config: &Self::CDROP) -> Result<(), crate::co::error::Error>; + fn dropout( + &self, + x: &SharedTensor, + result: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), crate::co::error::Error>; /// Computes non-linear down-sampling ([max Pooling][pooling]) over the input Tensor `x`. /// [pooling]: https://en.wikipedia.org/wiki/Dropout_(neural_networks) /// /// Saves the result to `result`. - fn dropout_grad(&self, x: &SharedTensor, x_diff: &SharedTensor, - result: &SharedTensor, result_diff: &mut SharedTensor, - config: &Self::CDROP) -> Result<(), crate::co::error::Error>; + fn dropout_grad( + &self, + x: &SharedTensor, + x_diff: &SharedTensor, + result: &SharedTensor, + result_diff: &mut SharedTensor, + config: &Self::CDROP, + ) -> Result<(), crate::co::error::Error>; } diff --git a/coaster-nn/src/tests/activation.rs b/coaster-nn/src/tests/activation.rs index 337ee2085..65e226bb1 100644 --- a/coaster-nn/src/tests/activation.rs +++ b/coaster-nn/src/tests/activation.rs @@ -4,70 +4,142 @@ // evil by itself and they'd add another level of indirection. Not nice. use std::fmt; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::Float; +use crate::co::prelude::*; use crate::plugin::{Relu, ReluPointwise, Sigmoid, SigmoidPointwise, Tanh, TanhPointwise}; -use crate::tests::{Epsilon, filled_tensor, tensor_assert_eq}; - -const DIMS: [usize; 4] = [3, 1, 2, 2]; - -const IN: [f64; 12] = - [1.121623378076182407, 0.562888119501944841, -1.339156477386188037, - 0.8759488434687463583, 0.5710683496214725947, 0.1198723930562685942, - -0.3748904319909696, 0.2090742138343960133, -0.6626539528423519309, - -0.918982785419555966, 1.402159805804972244, -1.978255365302346012]; - -const OUT_GRAD: [f64; 12] = - [-2.332776404488865508, 1.70589003183088233, -1.639385156921041195, - 0.06062355027829264628, -2.98757598356132714, 2.299513994512549636, - 1.47030623613516523, 2.225557495134344654, -0.4007462184938826337, - 2.815467050105664459, 2.709297453597423977, -2.895567849550241764]; - -const RELU_OUT: [f64; 12] = - [1.121623378076182407, 0.562888119501944841, 0.0, - 0.8759488434687463583, 0.5710683496214725947, 0.1198723930562685942, - 0.0, 0.2090742138343960133, 0.0, - 0.0, 1.402159805804972244, 0.0]; - -const RELU_IN_GRAD: [f64; 12] = - [-2.332776404488865508, 1.70589003183088233, 0.0, - 0.06062355027829264628, -2.98757598356132714, 2.299513994512549636, - 0.0, 2.225557495134344654, 0.0, - 0.0, 2.709297453597423977, 0.0]; - -const SIGMOID_OUT: [f64; 12] = - [0.7542897122404972129, 0.6371205316919988361, 0.2076488097696117026, - 0.7059820190977259747, 0.6390096546850780026, 0.5299322644783190903, - 0.4073598514454171755, 0.5520789850230285882, 0.3401436900840779611, - 0.2851652041111022745, 0.802526393449478207, 0.1215049398609489357]; - -const SIGMOID_IN_GRAD: [f64; 12] = - [-0.432349179202473982, 0.3943982949828453617, -0.2697293211639109796, - 0.0125837156776099823, -0.6891630213721981575, 0.5728182710094757442, - 0.3549581010823164556, 0.5503531707184640608, -0.08994586979838948354, - 0.5739217257889142655, 0.4293634492369666151, -0.3090772250654995713]; - -const TANH_OUT: [f64; 12] = - [0.8081328403503516179, 0.5101171538136796331, -0.8714694945764568607, - 0.7043839918457185048, 0.5161434470638732663, 0.1193015097168258505, - -0.3582618973187905141, 0.2060802001687216763, -0.5801268964170599928, - -0.7254158427301376402, 0.8858176038189352612, -0.9624586626492133626]; - -const TANH_IN_GRAD: [f64; 12] = - [-0.8092898516580304142, 1.261984162584895754, -0.3943392139172716021, - 0.0305447630844965373, -2.191673618115657509, 2.266785356248217009, - 1.28159009724762284, 2.131040185040205402, -0.2658761943586825486, - 1.333889047346882493, 0.5833853608611378429, -0.2133261045550120892]; - +use crate::tests::{filled_tensor, tensor_assert_eq, Epsilon}; + +const DIMS: [usize; 4] = [3, 1, 2, 2]; + +const IN: [f64; 12] = [ + 1.121623378076182407, + 0.562888119501944841, + -1.339156477386188037, + 0.8759488434687463583, + 0.5710683496214725947, + 0.1198723930562685942, + -0.3748904319909696, + 0.2090742138343960133, + -0.6626539528423519309, + -0.918982785419555966, + 1.402159805804972244, + -1.978255365302346012, +]; + +const OUT_GRAD: [f64; 12] = [ + -2.332776404488865508, + 1.70589003183088233, + -1.639385156921041195, + 0.06062355027829264628, + -2.98757598356132714, + 2.299513994512549636, + 1.47030623613516523, + 2.225557495134344654, + -0.4007462184938826337, + 2.815467050105664459, + 2.709297453597423977, + -2.895567849550241764, +]; + +const RELU_OUT: [f64; 12] = [ + 1.121623378076182407, + 0.562888119501944841, + 0.0, + 0.8759488434687463583, + 0.5710683496214725947, + 0.1198723930562685942, + 0.0, + 0.2090742138343960133, + 0.0, + 0.0, + 1.402159805804972244, + 0.0, +]; + +const RELU_IN_GRAD: [f64; 12] = [ + -2.332776404488865508, + 1.70589003183088233, + 0.0, + 0.06062355027829264628, + -2.98757598356132714, + 2.299513994512549636, + 0.0, + 2.225557495134344654, + 0.0, + 0.0, + 2.709297453597423977, + 0.0, +]; + +const SIGMOID_OUT: [f64; 12] = [ + 0.7542897122404972129, + 0.6371205316919988361, + 0.2076488097696117026, + 0.7059820190977259747, + 0.6390096546850780026, + 0.5299322644783190903, + 0.4073598514454171755, + 0.5520789850230285882, + 0.3401436900840779611, + 0.2851652041111022745, + 0.802526393449478207, + 0.1215049398609489357, +]; + +const SIGMOID_IN_GRAD: [f64; 12] = [ + -0.432349179202473982, + 0.3943982949828453617, + -0.2697293211639109796, + 0.0125837156776099823, + -0.6891630213721981575, + 0.5728182710094757442, + 0.3549581010823164556, + 0.5503531707184640608, + -0.08994586979838948354, + 0.5739217257889142655, + 0.4293634492369666151, + -0.3090772250654995713, +]; + +const TANH_OUT: [f64; 12] = [ + 0.8081328403503516179, + 0.5101171538136796331, + -0.8714694945764568607, + 0.7043839918457185048, + 0.5161434470638732663, + 0.1193015097168258505, + -0.3582618973187905141, + 0.2060802001687216763, + -0.5801268964170599928, + -0.7254158427301376402, + 0.8858176038189352612, + -0.9624586626492133626, +]; + +const TANH_IN_GRAD: [f64; 12] = [ + -0.8092898516580304142, + 1.261984162584895754, + -0.3943392139172716021, + 0.0305447630844965373, + -2.191673618115657509, + 2.266785356248217009, + 1.28159009724762284, + 2.131040185040205402, + -0.2658761943586825486, + 1.333889047346882493, + 0.5833853608611378429, + -0.2133261045550120892, +]; //----------------------------------------------------------- relu pub fn test_relu(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Relu + IBackend { - - let x = filled_tensor(&backend, &DIMS, &IN); +where + T: Float + Epsilon + fmt::Debug, + Backend: Relu + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &IN); let mut r = SharedTensor::::new(&DIMS); backend.relu(&x, &mut r).unwrap(); @@ -75,12 +147,13 @@ pub fn test_relu(backend: Backend) } pub fn test_relu_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Relu + IBackend { - - let x = filled_tensor(&backend, &DIMS, &RELU_OUT); +where + T: Float + Epsilon + fmt::Debug, + Backend: Relu + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &RELU_OUT); let dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); - let r = filled_tensor(&backend, &DIMS, &IN); + let r = filled_tensor(&backend, &DIMS, &IN); let mut dr = SharedTensor::new(&DIMS); backend.relu_grad(&x, &dx, &r, &mut dr).unwrap(); @@ -88,18 +161,21 @@ pub fn test_relu_grad(backend: Backend) } pub fn test_relu_pointwise(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: ReluPointwise + IBackend { - +where + T: Float + fmt::Debug + Epsilon, + Backend: ReluPointwise + IBackend, +{ let mut x = filled_tensor(&backend, &DIMS, &IN); backend.relu_pointwise(&mut x).unwrap(); tensor_assert_eq(&x, &RELU_OUT, 3.0); } pub fn test_relu_pointwise_grad(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: ReluPointwise + IBackend { - let x = filled_tensor(&backend, &DIMS, &RELU_OUT); +where + T: Float + fmt::Debug + Epsilon, + Backend: ReluPointwise + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &RELU_OUT); let mut dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); backend.relu_pointwise_grad(&x, &mut dx).unwrap(); tensor_assert_eq(&dx, &RELU_IN_GRAD, 3.0); @@ -108,10 +184,11 @@ pub fn test_relu_pointwise_grad(backend: Backend) //----------------------------------------------------------- sigmoid pub fn test_sigmoid(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Sigmoid + IBackend { - - let x = filled_tensor(&backend, &DIMS, &IN); +where + T: Float + Epsilon + fmt::Debug, + Backend: Sigmoid + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &IN); let mut r = SharedTensor::::new(&DIMS); backend.sigmoid(&x, &mut r).unwrap(); @@ -119,12 +196,13 @@ pub fn test_sigmoid(backend: Backend) } pub fn test_sigmoid_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Sigmoid + IBackend { - - let x = filled_tensor(&backend, &DIMS, &SIGMOID_OUT); +where + T: Float + Epsilon + fmt::Debug, + Backend: Sigmoid + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &SIGMOID_OUT); let dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); - let r = filled_tensor(&backend, &DIMS, &IN); + let r = filled_tensor(&backend, &DIMS, &IN); let mut dr = SharedTensor::new(&DIMS); backend.sigmoid_grad(&x, &dx, &r, &mut dr).unwrap(); @@ -132,18 +210,21 @@ pub fn test_sigmoid_grad(backend: Backend) } pub fn test_sigmoid_pointwise(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: SigmoidPointwise + IBackend { - +where + T: Float + fmt::Debug + Epsilon, + Backend: SigmoidPointwise + IBackend, +{ let mut x = filled_tensor(&backend, &DIMS, &IN); backend.sigmoid_pointwise(&mut x).unwrap(); tensor_assert_eq(&x, &SIGMOID_OUT, 3.0); } pub fn test_sigmoid_pointwise_grad(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: SigmoidPointwise + IBackend { - let x = filled_tensor(&backend, &DIMS, &SIGMOID_OUT); +where + T: Float + fmt::Debug + Epsilon, + Backend: SigmoidPointwise + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &SIGMOID_OUT); let mut dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); backend.sigmoid_pointwise_grad(&x, &mut dx).unwrap(); tensor_assert_eq(&dx, &SIGMOID_IN_GRAD, 3.0); @@ -152,10 +233,11 @@ pub fn test_sigmoid_pointwise_grad(backend: Backend) //----------------------------------------------------------- sigmoid pub fn test_tanh(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Tanh + IBackend { - - let x = filled_tensor(&backend, &DIMS, &IN); +where + T: Float + Epsilon + fmt::Debug, + Backend: Tanh + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &IN); let mut r = SharedTensor::::new(&DIMS); backend.tanh(&x, &mut r).unwrap(); @@ -163,12 +245,13 @@ pub fn test_tanh(backend: Backend) } pub fn test_tanh_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Tanh + IBackend { - - let x = filled_tensor(&backend, &DIMS, &TANH_OUT); +where + T: Float + Epsilon + fmt::Debug, + Backend: Tanh + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &TANH_OUT); let dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); - let r = filled_tensor(&backend, &DIMS, &IN); + let r = filled_tensor(&backend, &DIMS, &IN); let mut dr = SharedTensor::new(&DIMS); backend.tanh_grad(&x, &dx, &r, &mut dr).unwrap(); @@ -176,18 +259,21 @@ pub fn test_tanh_grad(backend: Backend) } pub fn test_tanh_pointwise(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: TanhPointwise + IBackend { - +where + T: Float + fmt::Debug + Epsilon, + Backend: TanhPointwise + IBackend, +{ let mut x = filled_tensor(&backend, &DIMS, &IN); backend.tanh_pointwise(&mut x).unwrap(); tensor_assert_eq(&x, &TANH_OUT, 3.0); } pub fn test_tanh_pointwise_grad(backend: Backend) - where T: Float + fmt::Debug + Epsilon, - Backend: TanhPointwise + IBackend { - let x = filled_tensor(&backend, &DIMS, &TANH_OUT); +where + T: Float + fmt::Debug + Epsilon, + Backend: TanhPointwise + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &TANH_OUT); let mut dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); backend.tanh_pointwise_grad(&x, &mut dx).unwrap(); tensor_assert_eq(&dx, &TANH_IN_GRAD, 10.0); @@ -198,20 +284,33 @@ mod native { test_native!(test_relu, relu_f32, relu_f64); test_native!(test_relu_grad, relu_grad_f32, relu_grad_f64); test_native!(test_relu_pointwise, relu_pointwise_f32, relu_pointwise_f64); - test_native!(test_relu_pointwise_grad, - relu_pointwise_grad_f32, relu_pointwise_grad_f64); + test_native!( + test_relu_pointwise_grad, + relu_pointwise_grad_f32, + relu_pointwise_grad_f64 + ); test_native!(test_sigmoid, sigmoid_f32, sigmoid_f64); test_native!(test_sigmoid_grad, sigmoid_grad_f32, sigmoid_grad_f64); - test_native!(test_sigmoid_pointwise, sigmoid_pointwise_f32, sigmoid_pointwise_f64); - test_native!(test_sigmoid_pointwise_grad, - sigmoid_pointwise_grad_f32, sigmoid_pointwise_grad_f64); + test_native!( + test_sigmoid_pointwise, + sigmoid_pointwise_f32, + sigmoid_pointwise_f64 + ); + test_native!( + test_sigmoid_pointwise_grad, + sigmoid_pointwise_grad_f32, + sigmoid_pointwise_grad_f64 + ); test_native!(test_tanh, tanh_f32, tanh_f64); test_native!(test_tanh_grad, tanh_grad_f32, tanh_grad_f64); test_native!(test_tanh_pointwise, tanh_pointwise_f32, tanh_pointwise_f64); - test_native!(test_tanh_pointwise_grad, - tanh_pointwise_grad_f32, tanh_pointwise_grad_f64); + test_native!( + test_tanh_pointwise_grad, + tanh_pointwise_grad_f32, + tanh_pointwise_grad_f64 + ); } mod cuda { @@ -219,18 +318,31 @@ mod cuda { test_cuda!(test_relu, relu_f32, relu_f64); test_cuda!(test_relu_grad, relu_grad_f32, relu_grad_f64); test_cuda!(test_relu_pointwise, relu_pointwise_f32, relu_pointwise_f64); - test_cuda!(test_relu_pointwise_grad, - relu_pointwise_grad_f32, relu_pointwise_grad_f64); + test_cuda!( + test_relu_pointwise_grad, + relu_pointwise_grad_f32, + relu_pointwise_grad_f64 + ); test_cuda!(test_sigmoid, sigmoid_f32, sigmoid_f64); test_cuda!(test_sigmoid_grad, sigmoid_grad_f32, sigmoid_grad_f64); - test_cuda!(test_sigmoid_pointwise, sigmoid_pointwise_f32, sigmoid_pointwise_f64); - test_cuda!(test_sigmoid_pointwise_grad, - sigmoid_pointwise_grad_f32, sigmoid_pointwise_grad_f64); + test_cuda!( + test_sigmoid_pointwise, + sigmoid_pointwise_f32, + sigmoid_pointwise_f64 + ); + test_cuda!( + test_sigmoid_pointwise_grad, + sigmoid_pointwise_grad_f32, + sigmoid_pointwise_grad_f64 + ); test_cuda!(test_tanh, tanh_f32, tanh_f64); test_cuda!(test_tanh_grad, tanh_grad_f32, tanh_grad_f64); test_cuda!(test_tanh_pointwise, tanh_pointwise_f32, tanh_pointwise_f64); - test_cuda!(test_tanh_pointwise_grad, - tanh_pointwise_grad_f32, tanh_pointwise_grad_f64); + test_cuda!( + test_tanh_pointwise_grad, + tanh_pointwise_grad_f32, + tanh_pointwise_grad_f64 + ); } diff --git a/coaster-nn/src/tests/bench_all.rs b/coaster-nn/src/tests/bench_all.rs index a4d535db7..6f9c8da27 100644 --- a/coaster-nn/src/tests/bench_all.rs +++ b/coaster-nn/src/tests/bench_all.rs @@ -11,7 +11,8 @@ macro_rules! bench_activation { let x = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); let mut r = SharedTensor::<$t>::new(&[$n]); - for _ in 0..3 { // warmup + for _ in 0..3 { + // warmup backend.$f(&x, &mut r).unwrap(); backend.synchronize().unwrap(); } @@ -26,14 +27,15 @@ macro_rules! bench_activation { pub fn $bench_grad_name(b: &mut Bencher) { let backend = ::tests::$backend_getter(); - let mut x = SharedTensor::<$t>::new(&[$n]); + let mut x = SharedTensor::<$t>::new(&[$n]); let dx = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); - let r = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); + let r = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); let mut dr = SharedTensor::<$t>::new(&[$n]); backend.$f(&r, &mut x).unwrap(); - for _ in 0..3 { // warmup + for _ in 0..3 { + // warmup backend.$f_grad(&x, &dx, &r, &mut dr).unwrap(); backend.synchronize().unwrap(); } @@ -43,7 +45,7 @@ macro_rules! bench_activation { backend.synchronize().unwrap(); }); } - } + }; } // softmax differs from activations only in arg count for grad function... @@ -57,7 +59,8 @@ macro_rules! bench_softmax { let x = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); let mut r = SharedTensor::<$t>::new(&[$n]); - for _ in 0..3 { // warmup + for _ in 0..3 { + // warmup backend.$f(&x, &mut r).unwrap(); backend.synchronize().unwrap(); } @@ -72,14 +75,15 @@ macro_rules! bench_softmax { pub fn $bench_grad_name(b: &mut Bencher) { let backend = ::tests::$backend_getter(); - let mut x = SharedTensor::<$t>::new(&[$n]); + let mut x = SharedTensor::<$t>::new(&[$n]); let dx = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); - let r = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); + let r = ::tests::uniformly_random_tensor(&[$n], -2.0, 2.0); let mut dr = SharedTensor::<$t>::new(&[$n]); backend.$f(&r, &mut x).unwrap(); - for _ in 0..3 { // warmup + for _ in 0..3 { + // warmup backend.$f_grad(&x, &dx, &mut dr).unwrap(); backend.synchronize().unwrap(); } @@ -89,7 +93,7 @@ macro_rules! bench_softmax { backend.synchronize().unwrap(); }); } - } + }; } macro_rules! define_benches { ($b:ident, $t:ident) => { @@ -143,11 +147,18 @@ macro_rules! define_benches { ($b:ident, $t:ident) => { bench_pooling!($b, $t, pooling_max, pooling_max_10k, pooling_max_grad_10k, 10_000) }} - -mod native_f32 { define_benches!(get_native_backend, f32); } -mod native_f64 { define_benches!(get_native_backend, f64); } +mod native_f32 { + define_benches!(get_native_backend, f32); +} +mod native_f64 { + define_benches!(get_native_backend, f64); +} #[cfg(feature = "cuda")] -mod cuda_f32 { define_benches!(get_cuda_backend, f32); } +mod cuda_f32 { + define_benches!(get_cuda_backend, f32); +} #[cfg(feature = "cuda")] -mod cuda_f64 { define_benches!(get_cuda_backend, f64); } +mod cuda_f64 { + define_benches!(get_cuda_backend, f64); +} diff --git a/coaster-nn/src/tests/convolutional.rs b/coaster-nn/src/tests/convolutional.rs index 323c495e8..43f2fefea 100644 --- a/coaster-nn/src/tests/convolutional.rs +++ b/coaster-nn/src/tests/convolutional.rs @@ -1,19 +1,22 @@ use std::fmt; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::Float; +use crate::co::prelude::*; -use crate::plugin::{Convolution, LRN, ConvForwardAlgo, ConvBackwardFilterAlgo, ConvBackwardDataAlgo, ConvolutionConfig}; -use crate::tests::{Epsilon, filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor}; +use crate::plugin::{ + ConvBackwardDataAlgo, ConvBackwardFilterAlgo, ConvForwardAlgo, Convolution, ConvolutionConfig, + LRN, +}; +use crate::tests::{filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor, Epsilon}; pub fn test_lrn(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: LRN + IBackend { - - let x = filled_tensor(&backend,&[1, 1, 3], &[1.0, 1.0, 2.0]); +where + T: Float + Epsilon + fmt::Debug, + Backend: LRN + IBackend, +{ + let x = filled_tensor(&backend, &[1, 1, 3], &[1.0, 1.0, 2.0]); let mut r = SharedTensor::::new(&[1, 1, 3]); - let conf = LRN::::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64) - .unwrap(); + let conf = LRN::::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap(); backend.lrn(&x, &mut r, &conf).unwrap(); @@ -22,16 +25,16 @@ pub fn test_lrn(backend: Backend) } pub fn test_lrn_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: LRN + IBackend { - - let x = filled_tensor(&backend,&[1, 1, 3], &[1.0, 1.0, 2.0]); - let dx = filled_tensor(&backend,&[1, 1, 3], &[1.0, 1.0, 2.0]); - let r = filled_tensor(&backend,&[1, 1, 3], &[1.0, 1.0, 2.0]); +where + T: Float + Epsilon + fmt::Debug, + Backend: LRN + IBackend, +{ + let x = filled_tensor(&backend, &[1, 1, 3], &[1.0, 1.0, 2.0]); + let dx = filled_tensor(&backend, &[1, 1, 3], &[1.0, 1.0, 2.0]); + let r = filled_tensor(&backend, &[1, 1, 3], &[1.0, 1.0, 2.0]); let mut dr = SharedTensor::::new(&[1, 1, 3]); - let conf = LRN::::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64) - .unwrap(); + let conf = LRN::::new_lrn_config(&backend, 1u32, 1e-4f64, 0.75f64, 2f64).unwrap(); backend.lrn_grad(&x, &dx, &r, &mut dr, &conf).unwrap(); @@ -39,19 +42,22 @@ pub fn test_lrn_grad(backend: Backend) tensor_assert_eq(&dr, &dr_test, 3.0); } - pub fn test_convolution(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Convolution + IBackend { - - let test = | input_dim : &[usize;4], filter_count: usize, filter_size: &[usize;2], stride: &[usize;2], padding: &[usize;2] | - { - let batch = input_dim[0]; - let width = input_dim[1]; +where + T: Float + Epsilon + fmt::Debug, + Backend: Convolution + IBackend, +{ + let test = |input_dim: &[usize; 4], + filter_count: usize, + filter_size: &[usize; 2], + stride: &[usize; 2], + padding: &[usize; 2]| { + let batch = input_dim[0]; + let width = input_dim[1]; let height = input_dim[2]; - let depth = input_dim[3]; + let depth = input_dim[3]; - let result_width = (width + 2 * padding[0] - filter_size[0]) / stride[0] + 1; + let result_width = (width + 2 * padding[0] - filter_size[0]) / stride[0] + 1; let result_height = (height + 2 * padding[1] - filter_size[1]) / stride[1] + 1; let f_element_count = filter_count * depth * filter_size[0] * filter_size[1]; @@ -59,16 +65,26 @@ pub fn test_convolution(backend: Backend) let x_val = vec![1.0; batch * depth * height * width]; let f_val = vec![1.0; f_element_count]; - let x = filled_tensor(&backend, &[batch, depth, height, width], &x_val); - let f = filled_tensor(&backend, &[filter_count, depth, filter_size[0], filter_size[1]], &f_val); - let mut r = SharedTensor::::new(&[batch, filter_count, result_height, result_width]); - - let conf = backend.new_convolution_config( - &x, &r, &f, - ConvForwardAlgo::Auto, - ConvBackwardFilterAlgo::Auto, - ConvBackwardDataAlgo::Auto, - &[stride[0] as i32, stride[1] as i32], &[padding[0] as i32, padding[1] as i32]).unwrap(); + let x = filled_tensor(&backend, &[batch, depth, height, width], &x_val); + let f = filled_tensor( + &backend, + &[filter_count, depth, filter_size[0], filter_size[1]], + &f_val, + ); + let mut r = SharedTensor::::new(&[batch, filter_count, result_height, result_width]); + + let conf = backend + .new_convolution_config( + &x, + &r, + &f, + ConvForwardAlgo::Auto, + ConvBackwardFilterAlgo::Auto, + ConvBackwardDataAlgo::Auto, + &[stride[0] as i32, stride[1] as i32], + &[padding[0] as i32, padding[1] as i32], + ) + .unwrap(); let mut ws = SharedTensor::::new(&[conf.workspace_size()]); @@ -79,19 +95,22 @@ pub fn test_convolution(backend: Backend) let expected_val_count = batch * filter_count * result_height * result_width; let expected_val = depth * filter_size[0] * filter_size[1]; let expected_val = expected_val as f64; - let expected_vals : Vec = vec![expected_val; expected_val_count]; - let expected : SharedTensor = filled_tensor(&backend, &[batch, filter_count, result_height, result_width], expected_vals.as_slice()); + let expected_vals: Vec = vec![expected_val; expected_val_count]; + let expected: SharedTensor = filled_tensor( + &backend, + &[batch, filter_count, result_height, result_width], + expected_vals.as_slice(), + ); tensor_assert_eq_tensor(&r, &expected, 3.0); }; // [batchsize, width, height, depth], k_filters, [filter_size_x, filter_size_y], stride, padding - test(&[4, 9, 9, 3], 6, &[3,3], &[1,1], &[0,0]); - test(&[2, 16, 16, 1], 1, &[4,4], &[1,1], &[0,0]); - test(&[2, 16, 16, 1], 1, &[2,2], &[1,1], &[0,0]); - test(&[2, 16, 16, 10], 10, &[2,2], &[1,1], &[0,0]); + test(&[4, 9, 9, 3], 6, &[3, 3], &[1, 1], &[0, 0]); + test(&[2, 16, 16, 1], 1, &[4, 4], &[1, 1], &[0, 0]); + test(&[2, 16, 16, 1], 1, &[2, 2], &[1, 1], &[0, 0]); + test(&[2, 16, 16, 10], 10, &[2, 2], &[1, 1], &[0, 0]); } - // TODO // pub fn test_convolution_grad(backend: Backend) // where T: Float + Epsilon + fmt::Debug, @@ -119,54 +138,80 @@ pub fn test_convolution(backend: Backend) // let mut dr = SharedTensor::::new(&[batch, k, h2, w2]); // } - -fn cross_test_convolution(backend_a: Backend, backend_b: Backend) - where Backend: Convolution + IBackend, - Backend: Convolution + IBackend { - +fn cross_test_convolution( + backend_a: Backend, + backend_b: Backend, +) where + Backend: Convolution + IBackend, + Backend: Convolution + IBackend, +{ // TODO add stride and padding // TODO use a slice for filtersize and k_filters let batch = 4; let width1 = 9; let height1 = 9; let depth1 = 3; - let padding = &[0i32,0i32]; - let stride = &[1i32,1i32]; + let padding = &[0i32, 0i32]; + let stride = &[1i32, 1i32]; let filter_size = 6; let filter_count = 3; - let result_width = (width1 - filter_size + 2 * (padding[0]) as usize) / (stride[0] as usize) + 1; - let result_height = (height1 - filter_size + 2 * (padding[1]) as usize) / (stride[1] as usize) + 1; + let result_width = + (width1 - filter_size + 2 * (padding[0]) as usize) / (stride[0] as usize) + 1; + let result_height = + (height1 - filter_size + 2 * (padding[1]) as usize) / (stride[1] as usize) + 1; let x_val = vec![1.0; batch * depth1 * height1 * width1]; let f_val = vec![1.0; filter_count * depth1 * filter_size * filter_size]; - let x = filled_tensor(&backend_a, &[batch, depth1, height1, width1], &x_val); - let f = filled_tensor(&backend_a, &[filter_count, depth1, filter_size, filter_size], &f_val); - let mut result_a = SharedTensor::::new(&[batch, filter_count, result_height, result_width]); - let mut result_b = SharedTensor::::new(&[batch, filter_count, result_height, result_width]); - - let conf_a = backend_a.new_convolution_config( - &x, &result_a, &f, - ConvForwardAlgo::Auto, - ConvBackwardFilterAlgo::Auto, - ConvBackwardDataAlgo::Auto, - stride, padding).unwrap(); + let x = filled_tensor(&backend_a, &[batch, depth1, height1, width1], &x_val); + let f = filled_tensor( + &backend_a, + &[filter_count, depth1, filter_size, filter_size], + &f_val, + ); + let mut result_a = + SharedTensor::::new(&[batch, filter_count, result_height, result_width]); + let mut result_b = + SharedTensor::::new(&[batch, filter_count, result_height, result_width]); + + let conf_a = backend_a + .new_convolution_config( + &x, + &result_a, + &f, + ConvForwardAlgo::Auto, + ConvBackwardFilterAlgo::Auto, + ConvBackwardDataAlgo::Auto, + stride, + padding, + ) + .unwrap(); let mut ws = SharedTensor::::new(&[conf_a.workspace_size()]); - backend_a.convolution(&f, &x, &mut result_a, &mut ws, &conf_a).unwrap(); + backend_a + .convolution(&f, &x, &mut result_a, &mut ws, &conf_a) + .unwrap(); - let conf_b = backend_b.new_convolution_config( - &x, &result_b, &f, - ConvForwardAlgo::Auto, - ConvBackwardFilterAlgo::Auto, - ConvBackwardDataAlgo::Auto, - stride, padding).unwrap(); + let conf_b = backend_b + .new_convolution_config( + &x, + &result_b, + &f, + ConvForwardAlgo::Auto, + ConvBackwardFilterAlgo::Auto, + ConvBackwardDataAlgo::Auto, + stride, + padding, + ) + .unwrap(); let mut ws = SharedTensor::::new(&[conf_b.workspace_size()]); - backend_b.convolution(&f, &x, &mut result_b, &mut ws, &conf_b).unwrap(); + backend_b + .convolution(&f, &x, &mut result_b, &mut ws, &conf_b) + .unwrap(); tensor_assert_eq_tensor(&result_a, &result_b, 3.0); } diff --git a/coaster-nn/src/tests/dropout.rs b/coaster-nn/src/tests/dropout.rs index 93d83660c..99f659fc8 100644 --- a/coaster-nn/src/tests/dropout.rs +++ b/coaster-nn/src/tests/dropout.rs @@ -1,43 +1,40 @@ use std::fmt; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::Float; +use crate::co::prelude::*; use crate::plugin::Dropout; -use crate::tests::{Epsilon, filled_tensor, tensor_assert_eq_tensor, tensor_assert_ne_tensor}; +use crate::tests::{filled_tensor, tensor_assert_eq_tensor, tensor_assert_ne_tensor, Epsilon}; pub fn test_dropout(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Dropout + IBackend { - - let test = |dims : &[usize], - probability : f32, - seed : u64, - tensor_assert_func : &dyn Fn(&SharedTensor, &SharedTensor, f64) | { +where + T: Float + Epsilon + fmt::Debug, + Backend: Dropout + IBackend, +{ + let test = + |dims: &[usize], + probability: f32, + seed: u64, + tensor_assert_func: &dyn Fn(&SharedTensor, &SharedTensor, f64)| { + let conf = Dropout::::new_dropout_config(&backend, probability, seed).unwrap(); - let conf = Dropout::::new_dropout_config(&backend, probability, seed) - .unwrap(); + let inp_element_num = dims.iter().fold(1, |factorial, f| factorial * f); - let inp_element_num = dims.iter().fold(1, |factorial, f| factorial * f ); + let inp_vals: Vec = (0..inp_element_num).map(|i| (i * i) as f64).collect(); - let inp_vals : Vec = (0..inp_element_num).map(|i| (i*i) as f64).collect(); + let x = filled_tensor(&backend, dims, &inp_vals); + let mut r = SharedTensor::::new(&dims); - let x = filled_tensor(&backend, dims, &inp_vals); - let mut r = SharedTensor::::new(&dims); + backend.dropout(&x, &mut r, &conf).unwrap(); - backend.dropout(&x, - &mut r, - &conf).unwrap(); - - tensor_assert_func(&x, &r, 0.0); - }; + tensor_assert_func(&x, &r, 0.0); + }; test(&[1, 5, 5, 2], 0.999, 77777, &tensor_assert_ne_tensor); test(&[1, 1, 1, 1], 0.000, 77777, &tensor_assert_eq_tensor); test(&[5, 200, 200, 4], 0.5, 77777, &tensor_assert_ne_tensor); } - // TODO // pub fn test_dropout_grad(backend: Backend) // where T: Float + Epsilon + fmt::Debug, diff --git a/coaster-nn/src/tests/mod.rs b/coaster-nn/src/tests/mod.rs index d03a8bb0a..54b89d8f6 100644 --- a/coaster-nn/src/tests/mod.rs +++ b/coaster-nn/src/tests/mod.rs @@ -11,22 +11,25 @@ use std::fmt; use rand::{thread_rng, Rng}; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::{cast, NumCast}; +use crate::co::prelude::*; pub trait Epsilon { fn epsilon() -> Self; } impl Epsilon for f32 { - fn epsilon() -> Self { std::f32::EPSILON } + fn epsilon() -> Self { + std::f32::EPSILON + } } impl Epsilon for f64 { - fn epsilon() -> Self { std::f64::EPSILON } + fn epsilon() -> Self { + std::f64::EPSILON + } } - #[cfg(feature = "native")] fn get_native_backend() -> Backend { Backend::::default().unwrap() @@ -38,11 +41,12 @@ fn get_opencl_backend() -> Backend { Backend::::default().unwrap() } -pub fn write_to_tensor(_backend: &Backend, xs: &mut SharedTensor, data: &[f64]) - where T: ::std::marker::Copy + NumCast, - F: IFramework, - Backend: IBackend { - +pub fn write_to_tensor(_backend: &Backend, xs: &mut SharedTensor, data: &[f64]) +where + T: ::std::marker::Copy + NumCast, + F: IFramework, + Backend: IBackend, +{ assert_eq!(xs.desc().size(), data.len()); let native = get_native_backend(); let native_dev = native.device(); @@ -53,20 +57,21 @@ pub fn write_to_tensor(_backend: &Backend, xs: &mut SharedTensor, dat mem_buffer[i] = cast::<_, T>(*x).unwrap(); } } - // not functional since, PartialEq has yet to be implemented for Device - // but tbh this is test only so screw the extra dangling ununsed memory alloc - // let other_dev = backend.device(); - // if other_dev != native_dev { - // xs.read(other_dev).unwrap(); - // xs.drop_device(native_dev).unwrap(); - // } + // not functional since, PartialEq has yet to be implemented for Device + // but tbh this is test only so screw the extra dangling ununsed memory alloc + // let other_dev = backend.device(); + // if other_dev != native_dev { + // xs.read(other_dev).unwrap(); + // xs.drop_device(native_dev).unwrap(); + // } } -pub fn filled_tensor(backend: &Backend, dims: &[usize], data: &[f64]) -> SharedTensor - where T: ::std::marker::Copy + NumCast, - F: IFramework, - Backend: IBackend { - +pub fn filled_tensor(backend: &Backend, dims: &[usize], data: &[f64]) -> SharedTensor +where + T: ::std::marker::Copy + NumCast, + F: IFramework, + Backend: IBackend, +{ let mut x = SharedTensor::new(&dims); write_to_tensor(backend, &mut x, data); x @@ -75,11 +80,17 @@ pub fn filled_tensor(backend: &Backend, dims: &[usize], data: &[f64]) -> // Currently unused. It was supposed to be used for random tests with inlined // verification or cross tests (Native <-> Cuda), but they aren't implemented // yet. -pub fn uniformly_random_tensor(_backend: &Backend, dims: &[usize], low: T, high: T) -> SharedTensor - where T: Copy + PartialEq + PartialOrd + rand::distributions::uniform::SampleUniform, - F: IFramework, - Backend: IBackend { - +pub fn uniformly_random_tensor( + _backend: &Backend, + dims: &[usize], + low: T, + high: T, +) -> SharedTensor +where + T: Copy + PartialEq + PartialOrd + rand::distributions::uniform::SampleUniform, + F: IFramework, + Backend: IBackend, +{ let mut xs = SharedTensor::new(&dims); { let native = get_native_backend(); @@ -93,13 +104,13 @@ pub fn uniformly_random_tensor(_backend: &Backend, dims: &[usize], low: *x = Rng::gen_range(&mut rng, low, high); } } - // not functional since, PartialEq has yet to be implemented for Device - // but tbh this is test only so screw the extra dangling ununsed memory alloc - // let other_dev = backend.device(); - // if other_dev != native_dev { - // xs.read(other_dev).unwrap(); - // xs.drop_device(native_dev).unwrap(); - // } + // not functional since, PartialEq has yet to be implemented for Device + // but tbh this is test only so screw the extra dangling ununsed memory alloc + // let other_dev = backend.device(); + // if other_dev != native_dev { + // xs.read(other_dev).unwrap(); + // xs.drop_device(native_dev).unwrap(); + // } } xs } @@ -109,8 +120,9 @@ pub fn uniformly_random_tensor(_backend: &Backend, dims: &[usize], low: /// Of course if there were inevitable substantial rounding errors during /// calculations of `xs` there may be false positives. pub fn tensor_assert_eq(xs: &SharedTensor, data: &[f64], epsilon_mul: f64) - where T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon { - +where + T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, +{ let e = cast::<_, f64>(T::epsilon()).unwrap() * epsilon_mul; let native = get_native_backend(); @@ -125,17 +137,24 @@ pub fn tensor_assert_eq(xs: &SharedTensor, data: &[f64], epsilon_mul: f64) let diff = (x1_t - x2).abs(); let max_diff = e * (x1_t.abs() + x2.abs()) * 0.5; if (x1_t - x2).abs() > e * (x1_t.abs() + x2.abs()) * 0.5 { - println!("Results differ: {:?} != {:?} ({:.2?} in {:?} and {:?}", - x1_t, x2, diff / max_diff, mem_slice, data); + println!( + "Results differ: {:?} != {:?} ({:.2?} in {:?} and {:?}", + x1_t, + x2, + diff / max_diff, + mem_slice, + data + ); assert!(false); } } } -pub fn tensor_assert_eq_tensor(xa: &SharedTensor, xb: &SharedTensor, epsilon_mul: f64) - where T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, - U: Copy + fmt::Debug + PartialEq + NumCast + Epsilon { - +pub fn tensor_assert_eq_tensor(xa: &SharedTensor, xb: &SharedTensor, epsilon_mul: f64) +where + T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, + U: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, +{ let e = cast::<_, f64>(T::epsilon()).unwrap() * epsilon_mul; let native = get_native_backend(); @@ -154,17 +173,24 @@ pub fn tensor_assert_eq_tensor(xa: &SharedTensor, xb: &SharedTensor, let diff = (x1_t - x2_t).abs(); let max_diff = e * (x1_t.abs() + x2_t.abs()) * 0.5; if (x1_t - x2_t).abs() > e * (x1_t.abs() + x2_t.abs()) * 0.5 { - println!("Results differ: {:?} != {:?} ({:.2?} in {:?} and {:?}", - x1_t, x2_t, diff / max_diff, mem_slice_a, mem_slice_b); + println!( + "Results differ: {:?} != {:?} ({:.2?} in {:?} and {:?}", + x1_t, + x2_t, + diff / max_diff, + mem_slice_a, + mem_slice_b + ); assert!(false); } } } -pub fn tensor_assert_ne_tensor(xa: &SharedTensor, xb: &SharedTensor, epsilon_mul: f64) - where T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, - U: Copy + fmt::Debug + PartialEq + NumCast + Epsilon { - +pub fn tensor_assert_ne_tensor(xa: &SharedTensor, xb: &SharedTensor, epsilon_mul: f64) +where + T: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, + U: Copy + fmt::Debug + PartialEq + NumCast + Epsilon, +{ let e = cast::<_, f64>(T::epsilon()).unwrap() * epsilon_mul; let native = get_native_backend(); @@ -184,7 +210,10 @@ pub fn tensor_assert_ne_tensor(xa: &SharedTensor, xb: &SharedTensor, return; } } - println!("Results are too similar {:?} ~= {:?}", mem_slice_a, mem_slice_b); + println!( + "Results are too similar {:?} ~= {:?}", + mem_slice_a, mem_slice_b + ); assert!(false); } @@ -194,7 +223,6 @@ pub fn tensor_assert_ne_tensor(xa: &SharedTensor, xb: &SharedTensor, // invocations could be made much less verbose. macro_rules! test_cuda { ($test_name:ident, $f32_name:ident, $f64_name:ident) => { - #[cfg(feature = "cuda")] #[test] fn $f32_name() { @@ -206,12 +234,11 @@ macro_rules! test_cuda { fn $f64_name() { $test_name::(crate::tests::get_cuda_backend()) } - } + }; } macro_rules! test_native { ($test_name:ident, $f32_name:ident, $f64_name:ident) => { - #[cfg(feature = "native")] #[test] fn $f32_name() { @@ -223,22 +250,25 @@ macro_rules! test_native { fn $f64_name() { $test_name::(crate::tests::get_native_backend()) } - } + }; } macro_rules! test_cross { ($test_name:ident, $f32_name:ident) => { - #[cfg(all(feature = "native",feature = "cuda"))] + #[cfg(all(feature = "native", feature = "cuda"))] #[test] fn $f32_name() { - $test_name::<_, _>(crate::tests::get_native_backend(), crate::tests::get_cuda_backend()) + $test_name::<_, _>( + crate::tests::get_native_backend(), + crate::tests::get_cuda_backend(), + ) } - } + }; } mod activation; +mod bench_all; mod convolutional; -mod softmax; -mod pooling; mod dropout; -mod bench_all; +mod pooling; +mod softmax; diff --git a/coaster-nn/src/tests/pooling.rs b/coaster-nn/src/tests/pooling.rs index 467d50a0a..5f014e46f 100644 --- a/coaster-nn/src/tests/pooling.rs +++ b/coaster-nn/src/tests/pooling.rs @@ -1,26 +1,31 @@ use std::fmt; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::Float; +use crate::co::prelude::*; use crate::plugin::Pooling; -use crate::tests::{Epsilon, filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor, uniformly_random_tensor}; - +use crate::tests::{ + filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor, uniformly_random_tensor, Epsilon, +}; pub fn test_pooling_max(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Pooling + IBackend { - - let test = |inp_dims: &[usize], out_dims: &[usize], window: &[i32], stride: &[i32], padding: &[i32] | { +where + T: Float + Epsilon + fmt::Debug, + Backend: Pooling + IBackend, +{ + let test = |inp_dims: &[usize], + out_dims: &[usize], + window: &[i32], + stride: &[i32], + padding: &[i32]| { let inp_size = (0..inp_dims.len()).fold(1, |mpy, x| mpy * inp_dims[x]); let out_size = (0..out_dims.len()).fold(1, |mpy, x| mpy * out_dims[x]); let mut inp = vec![1.0; inp_size]; inp[0] = 2.0; - let x = filled_tensor(&backend, inp_dims, &inp); + let x = filled_tensor(&backend, inp_dims, &inp); let mut r = SharedTensor::::new(&out_dims); - let conf = Pooling::::new_pooling_config(&backend, window, stride, padding) - .unwrap(); + let conf = Pooling::::new_pooling_config(&backend, window, stride, padding).unwrap(); backend.pooling_max(&x, &mut r, &conf).unwrap(); @@ -30,48 +35,53 @@ pub fn test_pooling_max(backend: Backend) }; // input dims , output dims , window, stride, padding - test(&[1, 1, 3, 3], &[1, 1, 2, 2], &[2, 2], &[1,1], &[0,0]); - test(&[1, 1, 10, 10], &[1, 1, 2, 2], &[9, 9], &[1,1], &[0,0]); - test(&[1, 1, 49, 49], &[1, 1, 7, 7], &[7, 7], &[7,7], &[0,0]); - test(&[1, 1, 4, 4], &[1, 1, 2, 2], &[2, 2], &[2,2], &[0,0]); - test(&[4, 1, 4, 4], &[4, 1, 2, 2], &[2, 2], &[2,2], &[0,0]); - test(&[1, 4, 4, 4], &[1, 4, 2, 2], &[2, 2], &[2,2], &[0,0]); - test(&[4, 4, 4, 4], &[4, 4, 3, 3], &[2, 2], &[2,2], &[1,1]); + test(&[1, 1, 3, 3], &[1, 1, 2, 2], &[2, 2], &[1, 1], &[0, 0]); + test(&[1, 1, 10, 10], &[1, 1, 2, 2], &[9, 9], &[1, 1], &[0, 0]); + test(&[1, 1, 49, 49], &[1, 1, 7, 7], &[7, 7], &[7, 7], &[0, 0]); + test(&[1, 1, 4, 4], &[1, 1, 2, 2], &[2, 2], &[2, 2], &[0, 0]); + test(&[4, 1, 4, 4], &[4, 1, 2, 2], &[2, 2], &[2, 2], &[0, 0]); + test(&[1, 4, 4, 4], &[1, 4, 2, 2], &[2, 2], &[2, 2], &[0, 0]); + test(&[4, 4, 4, 4], &[4, 4, 3, 3], &[2, 2], &[2, 2], &[1, 1]); } pub fn test_pooling_max_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Pooling + IBackend { +where + T: Float + Epsilon + fmt::Debug, + Backend: Pooling + IBackend, +{ let mut inp = vec![1.0; 256]; inp[0] = 2.0; - let x = filled_tensor(&backend,&[4, 4, 4, 4], &inp); - let dx = filled_tensor(&backend,&[4, 4, 4, 4], &inp); - let r = filled_tensor(&backend,&[4, 4, 2, 2], &inp[0..64]); + let x = filled_tensor(&backend, &[4, 4, 4, 4], &inp); + let dx = filled_tensor(&backend, &[4, 4, 4, 4], &inp); + let r = filled_tensor(&backend, &[4, 4, 2, 2], &inp[0..64]); let mut dr = SharedTensor::::new(&[4, 4, 2, 2]); - let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]) - .unwrap(); + let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]).unwrap(); - backend.pooling_max_grad(&x, &dx, &r, &mut dr, &conf).unwrap(); + backend + .pooling_max_grad(&x, &dx, &r, &mut dr, &conf) + .unwrap(); let dr_test = [ - 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, - 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]; + 2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, + ]; tensor_assert_eq(&dr, &dr_test, 3.0); } pub fn test_pooling_avg(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Pooling + IBackend { +where + T: Float + Epsilon + fmt::Debug, + Backend: Pooling + IBackend, +{ let mut inp = vec![1.0; 256]; inp[0] = 5.0; - let x = filled_tensor(&backend, &[4, 4, 4, 4], &inp); + let x = filled_tensor(&backend, &[4, 4, 4, 4], &inp); let mut r = SharedTensor::::new(&[4, 4, 2, 2]); - let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]) - .unwrap(); + let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]).unwrap(); backend.pooling_avg(&x, &mut r, &conf).unwrap(); @@ -80,21 +90,23 @@ pub fn test_pooling_avg(backend: Backend) tensor_assert_eq(&r, &r_test, 3.0); } - pub fn test_pooling_avg_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Pooling + IBackend { +where + T: Float + Epsilon + fmt::Debug, + Backend: Pooling + IBackend, +{ let mut inp = vec![1.0; 512]; inp[0] = 2.0; - let x = filled_tensor(&backend, &[8, 4, 4, 4], &inp); + let x = filled_tensor(&backend, &[8, 4, 4, 4], &inp); let dx = filled_tensor(&backend, &[8, 4, 4, 4], &inp); - let r = filled_tensor(&backend, &[8, 4, 2, 2], &inp[0..128]); + let r = filled_tensor(&backend, &[8, 4, 2, 2], &inp[0..128]); let mut dr = SharedTensor::::new(&[8, 4, 2, 2]); - let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]) - .unwrap(); + let conf = Pooling::::new_pooling_config(&backend, &[2, 2], &[2, 2], &[0, 0]).unwrap(); - backend.pooling_avg_grad(&x, &dx, &r, &mut dr, &conf).unwrap(); + backend + .pooling_avg_grad(&x, &dx, &r, &mut dr, &conf) + .unwrap(); let mut dr_test = vec![0.25; 128]; dr_test[0] = 0.5; @@ -104,11 +116,13 @@ pub fn test_pooling_avg_grad(backend: Backend) tensor_assert_eq(&dr, &dr_test, 1.0); } -pub fn cross_test_pooling_max(backend_a: Backend, backend_b: Backend) - where - Backend: Pooling + IBackend, - Backend: Pooling + IBackend { - +pub fn cross_test_pooling_max( + backend_a: Backend, + backend_b: Backend, +) where + Backend: Pooling + IBackend, + Backend: Pooling + IBackend, +{ let mut inp = vec![1.0; 192]; inp[0] = 2.0; @@ -119,17 +133,15 @@ pub fn cross_test_pooling_max(backend_a: Backend::new(dim_out); let mut r_b = SharedTensor::::new(dim_out); - let conf_a = Pooling::::new_pooling_config(&backend_a, window, stride, padding) - .unwrap(); - let conf_b = Pooling::::new_pooling_config(&backend_b, window, stride, padding) - .unwrap(); + let conf_a = Pooling::::new_pooling_config(&backend_a, window, stride, padding).unwrap(); + let conf_b = Pooling::::new_pooling_config(&backend_b, window, stride, padding).unwrap(); backend_a.pooling_max(&x, &mut r_a, &conf_a).unwrap(); backend_b.pooling_max(&x, &mut r_b, &conf_b).unwrap(); @@ -137,40 +149,44 @@ pub fn cross_test_pooling_max(backend_a: Backend(backend_a: Backend, backend_b: Backend) - where - Backend: Pooling + IBackend, - Backend: Pooling + IBackend { - +pub fn cross_test_pooling_max_grad( + backend_a: Backend, + backend_b: Backend, +) where + Backend: Pooling + IBackend, + Backend: Pooling + IBackend, +{ let mut inp = vec![1.0; 256]; inp[0] = 2.0; let batchsize = 1; let channels = 1; - let input_dims = &[batchsize,channels,2,2]; - let window = &[2,2]; - let stride = &[2,2]; - let padding = &[0,0]; + let input_dims = &[batchsize, channels, 2, 2]; + let window = &[2, 2]; + let stride = &[2, 2]; + let padding = &[0, 0]; // TODO calculate dynamically - let output_dims = &[batchsize,channels,1,1]; + let output_dims = &[batchsize, channels, 1, 1]; - let n_in = input_dims.iter().fold(1,|a, &b| a * b); - let n_out = output_dims.iter().fold(1,|a, &b| a * b); + let n_in = input_dims.iter().fold(1, |a, &b| a * b); + let n_out = output_dims.iter().fold(1, |a, &b| a * b); - let x = filled_tensor(&backend_a, input_dims, &inp[0..n_in]); + let x = filled_tensor(&backend_a, input_dims, &inp[0..n_in]); let dx = filled_tensor(&backend_a, input_dims, &inp[0..n_in]); - let r = filled_tensor(&backend_a, output_dims, &inp[0..n_out]); + let r = filled_tensor(&backend_a, output_dims, &inp[0..n_out]); let mut dr_a = SharedTensor::::new(output_dims); let mut dr_b = SharedTensor::::new(output_dims); - let conf_a = Pooling::::new_pooling_config(&backend_a, window, stride, padding) + let conf_a = Pooling::::new_pooling_config(&backend_a, window, stride, padding).unwrap(); + let conf_b = Pooling::::new_pooling_config(&backend_b, window, stride, padding).unwrap(); + + backend_a + .pooling_max_grad(&x, &dx, &r, &mut dr_a, &conf_a) .unwrap(); - let conf_b = Pooling::::new_pooling_config(&backend_b, window, stride, padding) + backend_b + .pooling_max_grad(&x, &dx, &r, &mut dr_b, &conf_b) .unwrap(); - backend_a.pooling_max_grad(&x, &dx, &r, &mut dr_a, &conf_a).unwrap(); - backend_b.pooling_max_grad(&x, &dx, &r, &mut dr_b, &conf_b).unwrap(); - tensor_assert_eq_tensor(&dr_a, &dr_b, 3.0); } @@ -183,9 +199,17 @@ mod cross { mod cuda { use super::*; test_cuda!(test_pooling_avg, pooling_avg_f32, pooling_avg_f64); - test_cuda!(test_pooling_avg_grad, pooling_avg_grad_f32, pooling_avg_grad_f64); + test_cuda!( + test_pooling_avg_grad, + pooling_avg_grad_f32, + pooling_avg_grad_f64 + ); test_cuda!(test_pooling_max, pooling_max_f32, pooling_max_f64); - test_cuda!(test_pooling_max_grad, pooling_max_grad_f32, pooling_max_grad_f64); + test_cuda!( + test_pooling_max_grad, + pooling_max_grad_f32, + pooling_max_grad_f64 + ); } mod native { diff --git a/coaster-nn/src/tests/softmax.rs b/coaster-nn/src/tests/softmax.rs index 80581ce95..7739b8d92 100644 --- a/coaster-nn/src/tests/softmax.rs +++ b/coaster-nn/src/tests/softmax.rs @@ -1,55 +1,109 @@ use std::fmt; -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::Float; +use crate::co::prelude::*; -use crate::plugin::{Softmax, LogSoftmax}; -use crate::tests::{Epsilon, filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor}; +use crate::plugin::{LogSoftmax, Softmax}; +use crate::tests::{filled_tensor, tensor_assert_eq, tensor_assert_eq_tensor, Epsilon}; const DIMS: [usize; 3] = [4, 1, 3]; -const IN: [f64; 12] = - [-0.3768541784373798341, -0.4190485384650235847, 0.5958971899345203651, - 1.201292917640018342, -0.2406155214817796814, -0.1324849200097359183, - 0.01328099434291760409, -0.581962897607930672, -0.5905963672681562759, - -0.9211015102408774548, -1.368822998145939182, 0.8509696368242991619]; - -const OUT_GRAD: [f64; 12] = - [-2.403764079434107295, 3.555336738840519548, -2.288944264898976203, - 1.969619340429111837, 2.804058445190456017, 1.407220298754862102, - -3.347891193465470093, 2.189872108671865896, 1.427670874053681487, - 0.2996809826406714856, -0.937226079977424, 0.5226855345859900333]; - -const SOFTMAX_OUT: [f64; 12] = - [0.05171473742304219928, 0.04957806199520956396, 0.1367964652178650133, - 0.2506079819996082626, 0.05926278008468241267, 0.06603018961553208981, - 0.07639199935558898925, 0.042124692050059925, 0.04176257521092115797, - 0.0300089438519342605, 0.01917819540548354381, 0.1765433777900725818]; - -const SOFTMAX_IN_GRAD: [f64; 12] = - [-0.1486650380754509047, 0.1529179607757129824, -0.3775436589037359082, - 0.3755787205930539972, 0.1382665450812886289, 0.06182216371989694205, - -0.2917288464701554001, 0.07240910171835037837, 0.03995516437097136814, - -0.005139575801680075536, -0.02700625907621399834, 0.009133722067961989762]; - -const LOG_SOFTMAX_OUT: [f64; 12] = - [-2.962012481550115532, -3.004206841577759283, -1.989261113178215333, - -1.383865385472717357, -2.82577382459451538, -2.717643223122471617, - -2.571877308769818094, -3.16712120072066637, -3.175754670380891974, - -3.506259813353613153, -3.953981301258674881, -1.734188666288436536]; - -const LOG_SOFTMAX_IN_GRAD: [f64; 12] = - [-2.672593766324700871, 3.297614171793778065, -3.000055888872397954, - 0.6668791798802118677, 2.495991626942767485, 1.063974328955915138, - -3.745001152657859367, 1.97089453402919622, 1.210575698149443993, - 0.1436849284861279457, -1.036920451889396308, -0.3950432084930862146]; - +const IN: [f64; 12] = [ + -0.3768541784373798341, + -0.4190485384650235847, + 0.5958971899345203651, + 1.201292917640018342, + -0.2406155214817796814, + -0.1324849200097359183, + 0.01328099434291760409, + -0.581962897607930672, + -0.5905963672681562759, + -0.9211015102408774548, + -1.368822998145939182, + 0.8509696368242991619, +]; + +const OUT_GRAD: [f64; 12] = [ + -2.403764079434107295, + 3.555336738840519548, + -2.288944264898976203, + 1.969619340429111837, + 2.804058445190456017, + 1.407220298754862102, + -3.347891193465470093, + 2.189872108671865896, + 1.427670874053681487, + 0.2996809826406714856, + -0.937226079977424, + 0.5226855345859900333, +]; + +const SOFTMAX_OUT: [f64; 12] = [ + 0.05171473742304219928, + 0.04957806199520956396, + 0.1367964652178650133, + 0.2506079819996082626, + 0.05926278008468241267, + 0.06603018961553208981, + 0.07639199935558898925, + 0.042124692050059925, + 0.04176257521092115797, + 0.0300089438519342605, + 0.01917819540548354381, + 0.1765433777900725818, +]; + +const SOFTMAX_IN_GRAD: [f64; 12] = [ + -0.1486650380754509047, + 0.1529179607757129824, + -0.3775436589037359082, + 0.3755787205930539972, + 0.1382665450812886289, + 0.06182216371989694205, + -0.2917288464701554001, + 0.07240910171835037837, + 0.03995516437097136814, + -0.005139575801680075536, + -0.02700625907621399834, + 0.009133722067961989762, +]; + +const LOG_SOFTMAX_OUT: [f64; 12] = [ + -2.962012481550115532, + -3.004206841577759283, + -1.989261113178215333, + -1.383865385472717357, + -2.82577382459451538, + -2.717643223122471617, + -2.571877308769818094, + -3.16712120072066637, + -3.175754670380891974, + -3.506259813353613153, + -3.953981301258674881, + -1.734188666288436536, +]; + +const LOG_SOFTMAX_IN_GRAD: [f64; 12] = [ + -2.672593766324700871, + 3.297614171793778065, + -3.000055888872397954, + 0.6668791798802118677, + 2.495991626942767485, + 1.063974328955915138, + -3.745001152657859367, + 1.97089453402919622, + 1.210575698149443993, + 0.1436849284861279457, + -1.036920451889396308, + -0.3950432084930862146, +]; pub fn test_softmax(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Softmax + IBackend { - - let x = filled_tensor(&backend, &DIMS, &IN); +where + T: Float + Epsilon + fmt::Debug, + Backend: Softmax + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &IN); let mut r = SharedTensor::::new(&DIMS); backend.softmax(&x, &mut r).unwrap(); @@ -57,10 +111,11 @@ pub fn test_softmax(backend: Backend) } pub fn test_softmax_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: Softmax + IBackend { - - let x = filled_tensor(&backend, &DIMS, &SOFTMAX_OUT); +where + T: Float + Epsilon + fmt::Debug, + Backend: Softmax + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &SOFTMAX_OUT); let dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); let mut dr = SharedTensor::new(&DIMS); @@ -69,10 +124,11 @@ pub fn test_softmax_grad(backend: Backend) } pub fn test_log_softmax(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: LogSoftmax + IBackend { - - let x = filled_tensor(&backend, &DIMS, &IN); +where + T: Float + Epsilon + fmt::Debug, + Backend: LogSoftmax + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &IN); let mut r = SharedTensor::::new(&DIMS); backend.log_softmax(&x, &mut r).unwrap(); @@ -80,10 +136,11 @@ pub fn test_log_softmax(backend: Backend) } pub fn test_log_softmax_grad(backend: Backend) - where T: Float + Epsilon + fmt::Debug, - Backend: LogSoftmax + IBackend { - - let x = filled_tensor(&backend, &DIMS, &LOG_SOFTMAX_OUT); +where + T: Float + Epsilon + fmt::Debug, + Backend: LogSoftmax + IBackend, +{ + let x = filled_tensor(&backend, &DIMS, &LOG_SOFTMAX_OUT); let dx = filled_tensor(&backend, &DIMS, &OUT_GRAD); let mut dr = SharedTensor::new(&DIMS); @@ -91,11 +148,14 @@ pub fn test_log_softmax_grad(backend: Backend) tensor_assert_eq(&dr, &LOG_SOFTMAX_IN_GRAD, 10.0); } -pub fn cross_test_log_softmax_grad(backend_a: Backend, backend_b: Backend) - where Backend: LogSoftmax + IBackend, - Backend: LogSoftmax + IBackend { - - let x = filled_tensor(&backend_a, &DIMS, &LOG_SOFTMAX_OUT); +pub fn cross_test_log_softmax_grad( + backend_a: Backend, + backend_b: Backend, +) where + Backend: LogSoftmax + IBackend, + Backend: LogSoftmax + IBackend, +{ + let x = filled_tensor(&backend_a, &DIMS, &LOG_SOFTMAX_OUT); let dx = filled_tensor(&backend_a, &DIMS, &OUT_GRAD); let mut dr_a = SharedTensor::new(&DIMS); let mut dr_b = SharedTensor::new(&DIMS); @@ -119,7 +179,11 @@ mod native { test_native!(test_softmax, softmax_f32, softmax_f64); test_native!(test_softmax_grad, softmax_grad_f32, softmax_grad_f64); test_native!(test_log_softmax, log_softmax_f32, log_softmax_f64); - test_native!(test_log_softmax_grad, log_softmax_grad_f32, log_softmax_grad_f64); + test_native!( + test_log_softmax_grad, + log_softmax_grad_f32, + log_softmax_grad_f64 + ); } mod cuda { @@ -127,5 +191,9 @@ mod cuda { test_cuda!(test_softmax, softmax_f32, softmax_f64); test_cuda!(test_softmax_grad, softmax_grad_f32, softmax_grad_f64); test_cuda!(test_log_softmax, log_softmax_f32, log_softmax_f64); - test_cuda!(test_log_softmax_grad, log_softmax_grad_f32, log_softmax_grad_f64); + test_cuda!( + test_log_softmax_grad, + log_softmax_grad_f32, + log_softmax_grad_f64 + ); } diff --git a/coaster/benches/shared_tensor.rs b/coaster/benches/shared_tensor.rs index cc7067cf6..eb49db98c 100644 --- a/coaster/benches/shared_tensor.rs +++ b/coaster/benches/shared_tensor.rs @@ -1,21 +1,21 @@ #![feature(test)] -extern crate test; extern crate coaster as co; extern crate rand; +extern crate test; -use test::Bencher; -use crate::co::device::IDevice; use crate::co::backend::{Backend, BackendConfig}; +use crate::co::device::IDevice; use crate::co::framework::IFramework; use crate::co::tensor::SharedTensor; +use test::Bencher; +#[cfg(feature = "cuda")] +use crate::co::frameworks::Cuda; #[cfg(feature = "native")] use crate::co::frameworks::Native; #[cfg(feature = "opencl")] use co::frameworks::OpenCL; -#[cfg(feature = "cuda")] -use crate::co::frameworks::Cuda; #[cfg(feature = "native")] fn native_backend() -> Backend { @@ -36,12 +36,15 @@ fn opencl_backend() -> Backend { #[cfg(feature = "cuda")] use crate::co::frameworks::cuda::get_cuda_backend as cuda_backend; -fn sync_back_and_forth(b: &mut Bencher, - backend1: Backend, backend2: Backend, - mem_size: usize) - where F1: co::IFramework + Clone, - F2: co::IFramework + Clone { - +fn sync_back_and_forth( + b: &mut Bencher, + backend1: Backend, + backend2: Backend, + mem_size: usize, +) where + F1: co::IFramework + Clone, + F2: co::IFramework + Clone, +{ let dev1 = backend1.device(); let dev2 = backend2.device(); @@ -58,12 +61,15 @@ fn sync_back_and_forth(b: &mut Bencher, }); } -fn unidirectional_sync(b: &mut Bencher, - src_backend: Backend, dst_backend: Backend, - mem_size: usize) - where F1: co::IFramework + Clone, - F2: co::IFramework + Clone { - +fn unidirectional_sync( + b: &mut Bencher, + src_backend: Backend, + dst_backend: Backend, + mem_size: usize, +) where + F1: co::IFramework + Clone, + F2: co::IFramework + Clone, +{ let src_dev = src_backend.device(); let dst_dev = dst_backend.device(); @@ -82,18 +88,13 @@ fn unidirectional_sync(b: &mut Bencher, #[cfg(feature = "native")] #[cfg(feature = "opencl")] mod opencl_and_native { - use test::Bencher; - use co::device::{IDevice}; + use super::{native_backend, opencl_backend, sync_back_and_forth, unidirectional_sync}; + use co::device::IDevice; use co::frameworks::opencl; - use super::{native_backend, opencl_backend, - sync_back_and_forth, unidirectional_sync}; + use test::Bencher; #[inline(never)] - fn bench_256_alloc_1mb_opencl_profile( - b: &mut Bencher, - device: &opencl::Context, - size: usize - ) { + fn bench_256_alloc_1mb_opencl_profile(b: &mut Bencher, device: &opencl::Context, size: usize) { b.iter(|| { for _ in 0..256 { device.alloc_memory(size).unwrap(); @@ -154,13 +155,11 @@ mod opencl_and_native { } } - #[cfg(feature = "native")] #[cfg(feature = "cuda")] mod cuda_and_native { + use super::{cuda_backend, native_backend, sync_back_and_forth, unidirectional_sync}; use test::Bencher; - use super::{native_backend, cuda_backend, - sync_back_and_forth, unidirectional_sync}; #[bench] fn bench_sync_1kb_native_cuda_back_and_forth(b: &mut Bencher) { diff --git a/coaster/examples/readme.rs b/coaster/examples/readme.rs index e8acd2e89..78c0002f4 100644 --- a/coaster/examples/readme.rs +++ b/coaster/examples/readme.rs @@ -3,13 +3,13 @@ use coaster_nn as nn; #[cfg(feature = "cuda")] use rcublas; -use nn::*; -use co::prelude::*; -use co::frameworks::native::flatbox::FlatBox; #[cfg(feature = "cuda")] use co::frameworks::cuda::get_cuda_backend; +use co::frameworks::native::flatbox::FlatBox; #[cfg(not(feature = "cuda"))] use co::frameworks::native::get_native_backend; +use co::prelude::*; +use nn::*; fn write_to_memory(mem: &mut FlatBox, data: &[T]) { let mem_buffer = mem.as_mut_slice::(); @@ -29,14 +29,16 @@ fn main() { let mut x = SharedTensor::::new(&(1, 1, 3)); // let mut result = SharedTensor::::new(&(1, 1, 3)); // Fill `x` with some data. - let payload: &[f32] = &::std::iter::repeat(1f32).take(x.capacity()).collect::>(); + let payload: &[f32] = &::std::iter::repeat(1f32) + .take(x.capacity()) + .collect::>(); let native = Backend::::default().unwrap(); write_to_memory(x.write_only(native.device()).unwrap(), payload); // Write to native host memory. - // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU. - // FIXME: Sigmoid cannot be included from coaster-nn without using cuda and native features - // from coaster-nn. This causes the error https://github.com/rust-lang/cargo/issues/6915 , - // and so sigmoid has been disabled for now. - // backend.sigmoid(&mut x, &mut result).unwrap(); - // See the result. - // println!("{:?}", result.read(native.device()).unwrap().as_slice::()); + // Run the sigmoid operation, provided by the NN Plugin, on your CUDA enabled GPU. + // FIXME: Sigmoid cannot be included from coaster-nn without using cuda and native features + // from coaster-nn. This causes the error https://github.com/rust-lang/cargo/issues/6915 , + // and so sigmoid has been disabled for now. + // backend.sigmoid(&mut x, &mut result).unwrap(); + // See the result. + // println!("{:?}", result.read(native.device()).unwrap().as_slice::()); } diff --git a/coaster/src/backend.rs b/coaster/src/backend.rs index 01293283c..15aec3104 100644 --- a/coaster/src/backend.rs +++ b/coaster/src/backend.rs @@ -40,9 +40,9 @@ //! } //! ``` +use crate::device::IDevice; use crate::error::Error; use crate::framework::IFramework; -use crate::device::IDevice; #[derive(Debug, Clone)] /// Defines the main and highest struct of Coaster. @@ -67,12 +67,10 @@ impl Backend { /// Initialize a new native Backend from a BackendConfig. pub fn new(config: BackendConfig) -> Result, Error> { let device = config.framework.new_device(config.hardwares)?; - Ok( - Backend { - framework: Box::new(config.framework), - device, - } - ) + Ok(Backend { + framework: Box::new(config.framework), + device, + }) } /// Returns the available hardware. @@ -95,8 +93,9 @@ impl Backend { /// /// Serves as a marker trait and helps for extern implementation. pub trait IBackend - where <::F as IFramework>::D : IDevice { - +where + <::F as IFramework>::D: IDevice, +{ /// Represents the Framework of a Backend. type F: IFramework + Clone; @@ -104,7 +103,10 @@ pub trait IBackend fn device(&self) -> &<::F as IFramework>::D; /// Try to create a default backend. - fn default() -> Result, Error> where Self: Sized { + fn default() -> Result, Error> + where + Self: Sized, + { let hw_framework = Self::F::new(); let hardwares = hw_framework.hardwares(); let framework = Self::F::new(); // dirty dirty hack to get around borrowing @@ -113,7 +115,9 @@ pub trait IBackend } /// Synchronize backend. - fn synchronize(&self) -> Result<(), crate::framework::Error> { Ok(()) } + fn synchronize(&self) -> Result<(), crate::framework::Error> { + Ok(()) + } } #[derive(Debug, Clone)] diff --git a/coaster/src/device.rs b/coaster/src/device.rs index efed95d1f..8da1fc592 100644 --- a/coaster/src/device.rs +++ b/coaster/src/device.rs @@ -7,22 +7,23 @@ //! [backend]: ../backend/index.html use std::any::Any; -use crate::hardware::IHardware; +#[cfg(feature = "cuda")] +use crate::frameworks::cuda::DriverError as CudaError; #[cfg(feature = "native")] use crate::frameworks::native::Error as NativeError; +use crate::hardware::IHardware; #[cfg(feature = "opencl")] use frameworks::opencl::Error as OpenCLError; -#[cfg(feature = "cuda")] -use crate::frameworks::cuda::DriverError as CudaError; -use std::{fmt, error}; +use std::{error, fmt}; /// Marker trait for backing memory. -pub trait IMemory { } +pub trait IMemory {} /// Specifies Hardware behavior across frameworks. pub trait IDevice - where Self: Any + Clone + Eq + Any + MemorySync { - +where + Self: Any + Clone + Eq + Any + MemorySync, +{ /// The Hardware representation for this Device. type H: IHardware; /// The Memory representation for this Device. @@ -41,11 +42,19 @@ pub trait IDevice /// so that base crate knows nothing about it at all. pub trait MemorySync { /// FIXME - fn sync_in(&self, my_memory: &mut dyn Any, src_device: &dyn Any, src_memory: &dyn Any) - -> Result<(), Error>; + fn sync_in( + &self, + my_memory: &mut dyn Any, + src_device: &dyn Any, + src_memory: &dyn Any, + ) -> Result<(), Error>; /// FIXME - fn sync_out(&self, my_memory: &dyn Any, dst_device: &dyn Any, dst_memory: &mut dyn Any) - -> Result<(), Error>; + fn sync_out( + &self, + my_memory: &dyn Any, + dst_device: &dyn Any, + dst_memory: &mut dyn Any, + ) -> Result<(), Error>; } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -81,7 +90,7 @@ impl fmt::Display for Error { #[cfg(feature = "opencl")] Error::OpenCL(ref err) => format!("OpenCL error: {}", err.to_string()), #[cfg(feature = "cuda")] - Error::Cuda(ref err) => format!("Cuda error: {}", err.to_string()) + Error::Cuda(ref err) => format!("Cuda error: {}", err.to_string()), }; write!(f, "{}", msg) diff --git a/coaster/src/framework.rs b/coaster/src/framework.rs index a3527f65b..4993739f0 100644 --- a/coaster/src/framework.rs +++ b/coaster/src/framework.rs @@ -18,13 +18,13 @@ //! // let backend: Backend = framework.create_backend(); //! ``` -use crate::hardware::IHardware; -use crate::device::IDevice; use crate::binary::IBinary; -#[cfg(feature = "opencl")] -use frameworks::opencl::Error as OpenCLError; +use crate::device::IDevice; #[cfg(feature = "cuda")] use crate::frameworks::cuda::DriverError as CudaError; +use crate::hardware::IHardware; +#[cfg(feature = "opencl")] +use frameworks::opencl::Error as OpenCLError; use std::error; use std::fmt; @@ -47,7 +47,9 @@ pub trait IFramework { /// Initializes a new Framework. /// /// Loads all the available hardwares - fn new() -> Self where Self: Sized; + fn new() -> Self + where + Self: Sized; /// Initializes all the available hardwares. fn load_hardwares() -> Result, Error>; diff --git a/coaster/src/frameworks/cuda/api/driver/context.rs b/coaster/src/frameworks/cuda/api/driver/context.rs index d8ce00f80..58adff213 100644 --- a/coaster/src/frameworks/cuda/api/driver/context.rs +++ b/coaster/src/frameworks/cuda/api/driver/context.rs @@ -2,9 +2,9 @@ //! //! A Coaster device can be understood as a synonym to Cuda's context. -use super::{API, Error}; -use crate::frameworks::cuda::Device; use super::ffi::*; +use super::{Error, API}; +use crate::frameworks::cuda::Device; use std::ptr; impl API { @@ -15,14 +15,14 @@ impl API { /// and for executing kernels on one or more devices specified in the context. /// An Cuda context is a synonym to a Coaster device. pub fn create_context(device: Device) -> Result { - unsafe {API::ffi_create_context(device.id_c())} + unsafe { API::ffi_create_context(device.id_c()) } } /// Removes a created Cuda context from the device. /// /// Should be called when freeing a Cuda::Context to not trash up the Cuda device. pub fn destroy_context(context: CUcontext) -> Result<(), Error> { - unsafe {API::ffi_destroy_context(context)} + unsafe { API::ffi_destroy_context(context) } } /// Synchronize the CUDA context associated with the current CPU thread. @@ -30,47 +30,83 @@ impl API { /// Should be called when you want to make sure that previous asynchronous operations /// have been executed. pub fn synchronize_context() -> Result<(), Error> { - unsafe {API::ffi_synchronize_context()} + unsafe { API::ffi_synchronize_context() } } - unsafe fn ffi_create_context( - dev: CUdevice, - ) -> Result { + unsafe fn ffi_create_context(dev: CUdevice) -> Result { let mut context: CUcontext = ptr::null_mut(); match cuCtxCreate_v2(&mut context, CU_CTX_SCHED_BLOCKING_SYNC, dev) { CUresult::CUDA_SUCCESS => Ok(context), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory("Device is out of memory.")), - status @ CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown("An unknown Error occured. Check the CUDA DRIVER API manual for more details.", status as i32 as u64)), - status => Err(Error::Unknown("Unable to create Cuda context.", status as i32 as u64)), + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_DEVICE => { + Err(Error::InvalidValue("Invalid value for `device` provided.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + CUresult::CUDA_ERROR_OUT_OF_MEMORY => { + Err(Error::OutOfMemory("Device is out of memory.")) + } + status @ CUresult::CUDA_ERROR_UNKNOWN => Err(Error::Unknown( + "An unknown Error occured. Check the CUDA DRIVER API manual for more details.", + status as i32 as u64, + )), + status => Err(Error::Unknown( + "Unable to create Cuda context.", + status as i32 as u64, + )), } } - unsafe fn ffi_destroy_context ( - ctx: CUcontext, - ) -> Result<(), Error> { + unsafe fn ffi_destroy_context(ctx: CUcontext) -> Result<(), Error> { match cuCtxDestroy_v2(ctx) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to destroy Cuda context.", status as i32 as u64)), + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to destroy Cuda context.", + status as i32 as u64, + )), } } - unsafe fn ffi_synchronize_context () -> Result<(), Error> { + unsafe fn ffi_synchronize_context() -> Result<(), Error> { match cuCtxSynchronize() { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to synchronize CUDA context.", status as i32 as u64)), + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to synchronize CUDA context.", + status as i32 as u64, + )), } } } diff --git a/coaster/src/frameworks/cuda/api/driver/device.rs b/coaster/src/frameworks/cuda/api/driver/device.rs index 4e8200324..14a1a907c 100644 --- a/coaster/src/frameworks/cuda/api/driver/device.rs +++ b/coaster/src/frameworks/cuda/api/driver/device.rs @@ -1,9 +1,9 @@ //! Provides the Cuda API with its device functionality. +use super::ffi::*; +use super::{Error, API}; use crate::frameworks::cuda::{Device, DeviceInfo}; use byteorder::{LittleEndian, WriteBytesExt}; -use super::ffi::*; -use super::{API, Error}; impl API { /// Returns fully initialized devices available through Cuda. @@ -12,34 +12,35 @@ impl API { /// information. pub fn load_devices() -> Result, Error> { match API::load_device_list() { - Ok(device_list) => { - Ok( - device_list.into_iter().map(|mut device| { - device - .load_name() - .load_device_type() - .load_compute_units() - }).collect() - ) - }, - Err(err) => Err(err) + Ok(device_list) => Ok(device_list + .into_iter() + .map(|mut device| device.load_name().load_device_type().load_compute_units()) + .collect()), + Err(err) => Err(err), } } /// Returns a list of available devices for the provided platform. pub fn load_device_list() -> Result, Error> { let mut device_counter = 0; - unsafe {API::ffi_device_get_count(&mut device_counter)}?; + unsafe { API::ffi_device_get_count(&mut device_counter) }?; - Ok((0..device_counter).collect::>().iter().map(|ordinal| { - let mut device_id: CUdevice = 0; - let _ = unsafe { API::ffi_device_get(&mut device_id, *ordinal) }; - Device::from_isize(device_id as isize) - }).collect::>()) + Ok((0..device_counter) + .collect::>() + .iter() + .map(|ordinal| { + let mut device_id: CUdevice = 0; + let _ = unsafe { API::ffi_device_get(&mut device_id, *ordinal) }; + Device::from_isize(device_id as isize) + }) + .collect::>()) } /// Returns the requested DeviceInfo for the provided device. - pub fn load_device_info(device: &Device, info: CUdevice_attribute) -> Result { + pub fn load_device_info( + device: &Device, + info: CUdevice_attribute, + ) -> Result { match info { CUdevice_attribute::CU_DEVICE_NAME => { let mut name: [i8; 1024] = [0; 1024]; @@ -48,15 +49,19 @@ impl API { // Removes obsolete whitespaces. for (i, char) in name.iter().enumerate() { match *char { - 0 => if i > 1 && name[i-1] != 0 { buf.push(*char as u8) }, - _ => buf.push(*char as u8) + 0 => { + if i > 1 && name[i - 1] != 0 { + buf.push(*char as u8) + } + } + _ => buf.push(*char as u8), } } Ok(DeviceInfo::new(buf)) - }, + } CUdevice_attribute::CU_DEVICE_MEMORY_TOTAL => { unimplemented!() - }, + } _ => { let mut value: ::libc::c_int = 0; unsafe { API::ffi_device_get_attribute(&mut value, info, device.id_c()) }?; @@ -67,32 +72,47 @@ impl API { } } - unsafe fn ffi_device_get( - device: *mut CUdevice, - ordinal: ::libc::c_int, - ) -> Result<(), Error> { + unsafe fn ffi_device_get(device: *mut CUdevice, ordinal: ::libc::c_int) -> Result<(), Error> { match cuDeviceGet(device, ordinal) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to get Device count.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to get Device count.", + status as i32 as u64, + )), } } - unsafe fn ffi_device_get_count( - count: *mut ::libc::c_int - ) -> Result<(), Error> { + unsafe fn ffi_device_get_count(count: *mut ::libc::c_int) -> Result<(), Error> { match cuDeviceGetCount(count) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to get Device count.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to get Device count.", + status as i32 as u64, + )), } } @@ -103,13 +123,25 @@ impl API { ) -> Result<(), Error> { match cuDeviceGetAttribute(pi, attrib, device) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - status => Err(Error::Unknown("Unable to get device attribute.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + CUresult::CUDA_ERROR_INVALID_DEVICE => { + Err(Error::InvalidValue("Invalid value for `device` provided.")) + } + status => Err(Error::Unknown( + "Unable to get device attribute.", + status as i32 as u64, + )), } } @@ -120,28 +152,50 @@ impl API { ) -> Result<(), Error> { match cuDeviceGetName(name, len, device) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - status => Err(Error::Unknown("Unable to get device name.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + CUresult::CUDA_ERROR_INVALID_DEVICE => { + Err(Error::InvalidValue("Invalid value for `device` provided.")) + } + status => Err(Error::Unknown( + "Unable to get device name.", + status as i32 as u64, + )), } } - unsafe fn ffi_device_total_mem( - bytes: *mut size_t, - device: CUdevice, - ) -> Result<(), Error> { + unsafe fn ffi_device_total_mem(bytes: *mut size_t, device: CUdevice) -> Result<(), Error> { match cuDeviceTotalMem_v2(bytes, device) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidValue("Invalid value for `device` provided.")), - status => Err(Error::Unknown("Unable to get total mem of device.", status as i32 as u64)) + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + CUresult::CUDA_ERROR_INVALID_DEVICE => { + Err(Error::InvalidValue("Invalid value for `device` provided.")) + } + status => Err(Error::Unknown( + "Unable to get total mem of device.", + status as i32 as u64, + )), } } } diff --git a/coaster/src/frameworks/cuda/api/driver/ffi.rs b/coaster/src/frameworks/cuda/api/driver/ffi.rs index 13f20d7a8..100a96307 100644 --- a/coaster/src/frameworks/cuda/api/driver/ffi.rs +++ b/coaster/src/frameworks/cuda/api/driver/ffi.rs @@ -35,10 +35,14 @@ pub struct Struct_Unnamed1 { pub __val: [::libc::c_int; 2usize], } impl ::std::clone::Clone for Struct_Unnamed1 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed1 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type __fsid_t = Struct_Unnamed1; pub type __clock_t = ::libc::c_long; @@ -88,10 +92,14 @@ impl Union_wait { } } impl ::std::clone::Clone for Union_wait { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_wait { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -99,10 +107,14 @@ pub struct Struct_Unnamed2 { pub _bindgen_bitfield_1_: ::libc::c_uint, } impl ::std::clone::Clone for Struct_Unnamed2 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed2 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -110,10 +122,14 @@ pub struct Struct_Unnamed3 { pub _bindgen_bitfield_1_: ::libc::c_uint, } impl ::std::clone::Clone for Struct_Unnamed3 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed3 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -131,10 +147,14 @@ impl Union_Unnamed4 { } } impl ::std::clone::Clone for Union_Unnamed4 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed4 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type __WAIT_STATUS = Union_Unnamed4; #[repr(C)] @@ -144,10 +164,14 @@ pub struct Struct_Unnamed5 { pub rem: ::libc::c_int, } impl ::std::clone::Clone for Struct_Unnamed5 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed5 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type div_t = Struct_Unnamed5; #[repr(C)] @@ -157,10 +181,14 @@ pub struct Struct_Unnamed6 { pub rem: ::libc::c_long, } impl ::std::clone::Clone for Struct_Unnamed6 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed6 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type ldiv_t = Struct_Unnamed6; #[repr(C)] @@ -170,10 +198,14 @@ pub struct Struct_Unnamed7 { pub rem: ::libc::c_longlong, } impl ::std::clone::Clone for Struct_Unnamed7 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed7 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type lldiv_t = Struct_Unnamed7; pub type u_char = __u_char; @@ -220,10 +252,14 @@ pub struct Struct_Unnamed8 { pub __val: [::libc::c_ulong; 16usize], } impl ::std::clone::Clone for Struct_Unnamed8 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed8 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type __sigset_t = Struct_Unnamed8; pub type sigset_t = __sigset_t; @@ -234,10 +270,14 @@ pub struct Struct_timespec { pub tv_nsec: __syscall_slong_t, } impl ::std::clone::Clone for Struct_timespec { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_timespec { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -246,10 +286,14 @@ pub struct Struct_timeval { pub tv_usec: __suseconds_t, } impl ::std::clone::Clone for Struct_timeval { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_timeval { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type suseconds_t = __suseconds_t; pub type __fd_mask = ::libc::c_long; @@ -259,10 +303,14 @@ pub struct Struct_Unnamed9 { pub __fds_bits: [__fd_mask; 16usize], } impl ::std::clone::Clone for Struct_Unnamed9 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed9 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type fd_set = Struct_Unnamed9; pub type fd_mask = __fd_mask; @@ -287,10 +335,14 @@ impl Union_pthread_attr_t { } } impl ::std::clone::Clone for Union_pthread_attr_t { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_pthread_attr_t { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_attr_t = Union_pthread_attr_t; #[repr(C)] @@ -300,10 +352,14 @@ pub struct Struct___pthread_internal_list { pub __next: *mut Struct___pthread_internal_list, } impl ::std::clone::Clone for Struct___pthread_internal_list { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct___pthread_internal_list { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type __pthread_list_t = Struct___pthread_internal_list; #[repr(C)] @@ -326,10 +382,14 @@ impl Union_Unnamed10 { } } impl ::std::clone::Clone for Union_Unnamed10 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed10 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -344,10 +404,14 @@ pub struct Struct___pthread_mutex_s { pub __list: __pthread_list_t, } impl ::std::clone::Clone for Struct___pthread_mutex_s { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct___pthread_mutex_s { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_mutex_t = Union_Unnamed10; #[repr(C)] @@ -366,10 +430,14 @@ impl Union_Unnamed11 { } } impl ::std::clone::Clone for Union_Unnamed11 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed11 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_mutexattr_t = Union_Unnamed11; #[repr(C)] @@ -392,10 +460,14 @@ impl Union_Unnamed12 { } } impl ::std::clone::Clone for Union_Unnamed12 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed12 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -410,10 +482,14 @@ pub struct Struct_Unnamed13 { pub __broadcast_seq: ::libc::c_uint, } impl ::std::clone::Clone for Struct_Unnamed13 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed13 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_cond_t = Union_Unnamed12; #[repr(C)] @@ -432,10 +508,14 @@ impl Union_Unnamed14 { } } impl ::std::clone::Clone for Union_Unnamed14 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed14 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_condattr_t = Union_Unnamed14; pub type pthread_key_t = ::libc::c_uint; @@ -460,10 +540,14 @@ impl Union_Unnamed15 { } } impl ::std::clone::Clone for Union_Unnamed15 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed15 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -482,10 +566,14 @@ pub struct Struct_Unnamed16 { pub __flags: ::libc::c_uint, } impl ::std::clone::Clone for Struct_Unnamed16 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed16 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_rwlock_t = Union_Unnamed15; #[repr(C)] @@ -504,10 +592,14 @@ impl Union_Unnamed17 { } } impl ::std::clone::Clone for Union_Unnamed17 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed17 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_rwlockattr_t = Union_Unnamed17; pub type pthread_spinlock_t = ::libc::c_int; @@ -527,10 +619,14 @@ impl Union_Unnamed18 { } } impl ::std::clone::Clone for Union_Unnamed18 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed18 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_barrier_t = Union_Unnamed18; #[repr(C)] @@ -549,10 +645,14 @@ impl Union_Unnamed19 { } } impl ::std::clone::Clone for Union_Unnamed19 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed19 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type pthread_barrierattr_t = Union_Unnamed19; #[repr(C)] @@ -567,10 +667,14 @@ pub struct Struct_random_data { pub end_ptr: *mut int32_t, } impl ::std::clone::Clone for Struct_random_data { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_random_data { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -582,38 +686,41 @@ pub struct Struct_drand48_data { pub __a: ::libc::c_ulonglong, } impl ::std::clone::Clone for Struct_drand48_data { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_drand48_data { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } -pub type __compar_fn_t = - ::std::option::Option ::libc::c_int>; +pub type __compar_fn_t = ::std::option::Option< + unsafe extern "C" fn(arg1: *const ::libc::c_void, arg2: *const ::libc::c_void) -> ::libc::c_int, +>; pub type CUdeviceptr = ::libc::c_ulonglong; pub type CUdevice = ::libc::c_int; #[allow(missing_copy_implementations)] #[derive(Debug)] -pub enum Struct_CUctx_st { } +pub enum Struct_CUctx_st {} pub type CUcontext = *mut Struct_CUctx_st; -pub enum Struct_CUmod_st { } +pub enum Struct_CUmod_st {} pub type CUmodule = *mut Struct_CUmod_st; -pub enum Struct_CUfunc_st { } +pub enum Struct_CUfunc_st {} pub type CUfunction = *mut Struct_CUfunc_st; -pub enum Struct_CUarray_st { } +pub enum Struct_CUarray_st {} pub type CUarray = *mut Struct_CUarray_st; -pub enum Struct_CUmipmappedArray_st { } +pub enum Struct_CUmipmappedArray_st {} pub type CUmipmappedArray = *mut Struct_CUmipmappedArray_st; -pub enum Struct_CUtexref_st { } +pub enum Struct_CUtexref_st {} pub type CUtexref = *mut Struct_CUtexref_st; -pub enum Struct_CUsurfref_st { } +pub enum Struct_CUsurfref_st {} pub type CUsurfref = *mut Struct_CUsurfref_st; -pub enum Struct_CUevent_st { } +pub enum Struct_CUevent_st {} pub type CUevent = *mut Struct_CUevent_st; -pub enum Struct_CUstream_st { } +pub enum Struct_CUstream_st {} pub type CUstream = *mut Struct_CUstream_st; -pub enum Struct_CUgraphicsResource_st { } +pub enum Struct_CUgraphicsResource_st {} pub type CUgraphicsResource = *mut Struct_CUgraphicsResource_st; pub type CUtexObject = ::libc::c_ulonglong; pub type CUsurfObject = ::libc::c_ulonglong; @@ -623,10 +730,14 @@ pub struct Struct_CUuuid_st { pub bytes: [::libc::c_char; 16usize], } impl ::std::clone::Clone for Struct_CUuuid_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUuuid_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUuuid = Struct_CUuuid_st; #[repr(C)] @@ -635,10 +746,14 @@ pub struct Struct_CUipcEventHandle_st { pub reserved: [::libc::c_char; 64usize], } impl ::std::clone::Clone for Struct_CUipcEventHandle_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUipcEventHandle_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUipcEventHandle = Struct_CUipcEventHandle_st; #[repr(C)] @@ -647,10 +762,14 @@ pub struct Struct_CUipcMemHandle_st { pub reserved: [::libc::c_char; 64usize], } impl ::std::clone::Clone for Struct_CUipcMemHandle_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUipcMemHandle_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUipcMemHandle = Struct_CUipcMemHandle_st; pub type Enum_CUipcMem_flags_enum = ::libc::c_uint; @@ -815,10 +934,14 @@ pub struct Struct_CUdevprop_st { pub textureAlign: ::libc::c_int, } impl ::std::clone::Clone for Struct_CUdevprop_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUdevprop_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUdevprop = Struct_CUdevprop_st; pub type Enum_CUpointer_attribute_enum = ::libc::c_uint; @@ -914,7 +1037,7 @@ pub const CU_JIT_INPUT_OBJECT: ::libc::c_uint = 3; pub const CU_JIT_INPUT_LIBRARY: ::libc::c_uint = 4; pub const CU_JIT_NUM_INPUT_TYPES: ::libc::c_uint = 5; pub type CUjitInputType = Enum_CUjitInputType_enum; -pub enum Struct_CUlinkState_st { } +pub enum Struct_CUlinkState_st {} pub type CUlinkState = *mut Struct_CUlinkState_st; pub type Enum_CUgraphicsRegisterFlags_enum = ::libc::c_uint; pub const CU_GRAPHICS_REGISTER_FLAGS_NONE: ::libc::c_uint = 0; @@ -1012,11 +1135,9 @@ pub enum CUresult { CUDA_ERROR_NOT_SUPPORTED = 801, CUDA_ERROR_UNKNOWN = 999, } -pub type CUstreamCallback = - ::std::option::Option ()>; +pub type CUstreamCallback = ::std::option::Option< + unsafe extern "C" fn(hStream: CUstream, status: CUresult, userData: *mut ::libc::c_void) -> (), +>; pub type CUoccupancyB2DSize = ::std::option::Option size_t>; #[repr(C)] @@ -1040,10 +1161,14 @@ pub struct Struct_CUDA_MEMCPY2D_st { pub Height: size_t, } impl ::std::clone::Clone for Struct_CUDA_MEMCPY2D_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_MEMCPY2D_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_MEMCPY2D = Struct_CUDA_MEMCPY2D_st; #[repr(C)] @@ -1076,10 +1201,14 @@ pub struct Struct_CUDA_MEMCPY3D_st { pub Depth: size_t, } impl ::std::clone::Clone for Struct_CUDA_MEMCPY3D_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_MEMCPY3D_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_MEMCPY3D = Struct_CUDA_MEMCPY3D_st; #[repr(C)] @@ -1112,10 +1241,14 @@ pub struct Struct_CUDA_MEMCPY3D_PEER_st { pub Depth: size_t, } impl ::std::clone::Clone for Struct_CUDA_MEMCPY3D_PEER_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_MEMCPY3D_PEER_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_MEMCPY3D_PEER = Struct_CUDA_MEMCPY3D_PEER_st; #[repr(C)] @@ -1127,10 +1260,14 @@ pub struct Struct_CUDA_ARRAY_DESCRIPTOR_st { pub NumChannels: ::libc::c_uint, } impl ::std::clone::Clone for Struct_CUDA_ARRAY_DESCRIPTOR_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_ARRAY_DESCRIPTOR_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_ARRAY_DESCRIPTOR = Struct_CUDA_ARRAY_DESCRIPTOR_st; #[repr(C)] @@ -1144,10 +1281,14 @@ pub struct Struct_CUDA_ARRAY3D_DESCRIPTOR_st { pub Flags: ::libc::c_uint, } impl ::std::clone::Clone for Struct_CUDA_ARRAY3D_DESCRIPTOR_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_ARRAY3D_DESCRIPTOR_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_ARRAY3D_DESCRIPTOR = Struct_CUDA_ARRAY3D_DESCRIPTOR_st; #[repr(C)] @@ -1158,10 +1299,14 @@ pub struct Struct_CUDA_RESOURCE_DESC_st { pub flags: ::libc::c_uint, } impl ::std::clone::Clone for Struct_CUDA_RESOURCE_DESC_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_RESOURCE_DESC_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1191,10 +1336,14 @@ impl Union_Unnamed20 { } } impl ::std::clone::Clone for Union_Unnamed20 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Union_Unnamed20 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1202,10 +1351,14 @@ pub struct Struct_Unnamed21 { pub hArray: CUarray, } impl ::std::clone::Clone for Struct_Unnamed21 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed21 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1213,10 +1366,14 @@ pub struct Struct_Unnamed22 { pub hMipmappedArray: CUmipmappedArray, } impl ::std::clone::Clone for Struct_Unnamed22 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed22 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1227,10 +1384,14 @@ pub struct Struct_Unnamed23 { pub sizeInBytes: size_t, } impl ::std::clone::Clone for Struct_Unnamed23 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed23 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1243,10 +1404,14 @@ pub struct Struct_Unnamed24 { pub pitchInBytes: size_t, } impl ::std::clone::Clone for Struct_Unnamed24 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed24 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } #[repr(C)] #[derive(Copy)] @@ -1254,10 +1419,14 @@ pub struct Struct_Unnamed25 { pub reserved: [::libc::c_int; 32usize], } impl ::std::clone::Clone for Struct_Unnamed25 { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_Unnamed25 { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_RESOURCE_DESC = Struct_CUDA_RESOURCE_DESC_st; #[repr(C)] @@ -1274,10 +1443,14 @@ pub struct Struct_CUDA_TEXTURE_DESC_st { pub reserved: [::libc::c_int; 16usize], } impl ::std::clone::Clone for Struct_CUDA_TEXTURE_DESC_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_TEXTURE_DESC_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_TEXTURE_DESC = Struct_CUDA_TEXTURE_DESC_st; pub type Enum_CUresourceViewFormat_enum = ::libc::c_uint; @@ -1331,10 +1504,14 @@ pub struct Struct_CUDA_RESOURCE_VIEW_DESC_st { pub reserved: [::libc::c_uint; 16usize], } impl ::std::clone::Clone for Struct_CUDA_RESOURCE_VIEW_DESC_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_RESOURCE_VIEW_DESC_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_RESOURCE_VIEW_DESC = Struct_CUDA_RESOURCE_VIEW_DESC_st; #[repr(C)] @@ -1344,10 +1521,14 @@ pub struct Struct_CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st { pub vaSpaceToken: ::libc::c_uint, } impl ::std::clone::Clone for Struct_CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st { - fn clone(&self) -> Self { *self } + fn clone(&self) -> Self { + *self + } } impl ::std::default::Default for Struct_CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st { - fn default() -> Self { unsafe { ::std::mem::zeroed() } } + fn default() -> Self { + unsafe { ::std::mem::zeroed() } + } } pub type CUDA_POINTER_ATTRIBUTE_P2P_TOKENS = Struct_CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st; @@ -1356,7 +1537,7 @@ extern "C" { // CUDA ERROR HANDLING pub fn cuGetErrorString(error: CUresult, pStr: *mut *const ::libc::c_char) -> CUresult; - pub fn cuGetErrorName(error: CUresult, pStr: *mut *const ::libc::c_char)-> CUresult; + pub fn cuGetErrorName(error: CUresult, pStr: *mut *const ::libc::c_char) -> CUresult; // CUDA INITIALIZATION pub fn cuInit(Flags: ::libc::c_uint) -> CUresult; @@ -1372,7 +1553,7 @@ extern "C" { pub fn cuDeviceGetName( name: *mut ::libc::c_char, len: ::libc::c_int, - dev: CUdevice + dev: CUdevice, ) -> CUresult; pub fn cuDeviceTotalMem_v2(bytes: *mut size_t, dev: CUdevice) -> CUresult; @@ -1380,7 +1561,7 @@ extern "C" { pub fn cuDeviceGetAttribute( pi: *mut ::libc::c_int, attrib: CUdevice_attribute, - dev: CUdevice + dev: CUdevice, ) -> CUresult; // CUDA PRIMARY CONTEXT MANAGEMENT @@ -1391,17 +1572,13 @@ extern "C" { pub fn cuDevicePrimaryCtxGetState( dev: CUdevice, flags: *mut ::libc::c_uint, - active: *mut ::libc::c_int + active: *mut ::libc::c_int, ) -> CUresult; pub fn cuDevicePrimaryCtxReset(dev: CUdevice) -> CUresult; // CUDA CONTEXT MANAGEMENT - pub fn cuCtxCreate_v2( - pctx: *mut CUcontext, - flags: ::libc::c_uint, - dev: CUdevice - ) -> CUresult; + pub fn cuCtxCreate_v2(pctx: *mut CUcontext, flags: ::libc::c_uint, dev: CUdevice) -> CUresult; pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult; @@ -1435,32 +1612,25 @@ extern "C" { pub fn cuCtxGetStreamPriorityRange( leastPriority: *mut ::libc::c_int, - greatestPriority: *mut ::libc::c_int + greatestPriority: *mut ::libc::c_int, ) -> CUresult; - // CUDA MODULE MANAGEMENT - pub fn cuModuleLoad( - module: *mut CUmodule, - fname: *const ::libc::c_char - ) -> CUresult; + pub fn cuModuleLoad(module: *mut CUmodule, fname: *const ::libc::c_char) -> CUresult; - pub fn cuModuleLoadData( - module: *mut CUmodule, - image: *const ::libc::c_void - ) -> CUresult; + pub fn cuModuleLoadData(module: *mut CUmodule, image: *const ::libc::c_void) -> CUresult; pub fn cuModuleLoadDataEx( module: *mut CUmodule, image: *const ::libc::c_void, numOptions: ::libc::c_uint, options: *mut CUjit_option, - optionValues: *mut *mut ::libc::c_void + optionValues: *mut *mut ::libc::c_void, ) -> CUresult; pub fn cuModuleLoadFatBinary( module: *mut CUmodule, - fatCubin: *const ::libc::c_void + fatCubin: *const ::libc::c_void, ) -> CUresult; pub fn cuModuleUnload(hmod: CUmodule) -> CUresult; @@ -1468,33 +1638,33 @@ extern "C" { pub fn cuModuleGetFunction( hfunc: *mut CUfunction, hmod: CUmodule, - name: *const ::libc::c_char + name: *const ::libc::c_char, ) -> CUresult; pub fn cuModuleGetGlobal_v2( dptr: *mut CUdeviceptr, bytes: *mut size_t, hmod: CUmodule, - name: *const ::libc::c_char + name: *const ::libc::c_char, ) -> CUresult; pub fn cuModuleGetTexRef( pTexRef: *mut CUtexref, hmod: CUmodule, - name: *const ::libc::c_char + name: *const ::libc::c_char, ) -> CUresult; pub fn cuModuleGetSurfRef( pSurfRef: *mut CUsurfref, hmod: CUmodule, - name: *const ::libc::c_char + name: *const ::libc::c_char, ) -> CUresult; pub fn cuLinkCreate_v2( numOptions: ::libc::c_uint, options: *mut CUjit_option, optionValues: *mut *mut ::libc::c_void, - stateOut: *mut CUlinkState + stateOut: *mut CUlinkState, ) -> CUresult; pub fn cuLinkAddData_v2( @@ -1505,7 +1675,7 @@ extern "C" { name: *const ::libc::c_char, numOptions: ::libc::c_uint, options: *mut CUjit_option, - optionValues: *mut *mut ::libc::c_void + optionValues: *mut *mut ::libc::c_void, ) -> CUresult; pub fn cuLinkAddFile_v2( @@ -1514,13 +1684,13 @@ extern "C" { path: *const ::libc::c_char, numOptions: ::libc::c_uint, options: *mut CUjit_option, - optionValues: *mut *mut ::libc::c_void + optionValues: *mut *mut ::libc::c_void, ) -> CUresult; pub fn cuLinkComplete( state: CUlinkState, cubinOut: *mut *mut ::libc::c_void, - sizeOut: *mut size_t + sizeOut: *mut size_t, ) -> CUresult; pub fn cuLinkDestroy(state: CUlinkState) -> CUresult; @@ -1535,7 +1705,7 @@ extern "C" { pPitch: *mut size_t, WidthInBytes: size_t, Height: size_t, - ElementSizeBytes: ::libc::c_uint + ElementSizeBytes: ::libc::c_uint, ) -> CUresult; pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult; @@ -1543,7 +1713,7 @@ extern "C" { pub fn cuMemGetAddressRange_v2( pbase: *mut CUdeviceptr, psize: *mut size_t, - dptr: CUdeviceptr + dptr: CUdeviceptr, ) -> CUresult; pub fn cuMemAllocHost_v2(pp: *mut *mut ::libc::c_void, bytesize: size_t) -> CUresult; @@ -1553,53 +1723,41 @@ extern "C" { pub fn cuMemHostAlloc( pp: *mut *mut ::libc::c_void, bytesize: size_t, - Flags: ::libc::c_uint + Flags: ::libc::c_uint, ) -> CUresult; pub fn cuMemHostGetDevicePointer_v2( pdptr: *mut CUdeviceptr, p: *mut ::libc::c_void, - Flags: ::libc::c_uint + Flags: ::libc::c_uint, ) -> CUresult; - pub fn cuMemHostGetFlags( - pFlags: *mut ::libc::c_uint, - p: *mut ::libc::c_void - ) -> CUresult; + pub fn cuMemHostGetFlags(pFlags: *mut ::libc::c_uint, p: *mut ::libc::c_void) -> CUresult; pub fn cuMemAllocManaged( dptr: *mut CUdeviceptr, bytesize: size_t, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; - pub fn cuDeviceGetByPCIBusId( - dev: *mut CUdevice, - pciBusId: *const ::libc::c_char - ) -> CUresult; + pub fn cuDeviceGetByPCIBusId(dev: *mut CUdevice, pciBusId: *const ::libc::c_char) -> CUresult; pub fn cuDeviceGetPCIBusId( pciBusId: *mut ::libc::c_char, len: ::libc::c_int, - dev: CUdevice + dev: CUdevice, ) -> CUresult; - pub fn cuIpcGetEventHandle( - pHandle: *mut CUipcEventHandle, - event: CUevent - ) -> CUresult; + pub fn cuIpcGetEventHandle(pHandle: *mut CUipcEventHandle, event: CUevent) -> CUresult; - pub fn cuIpcOpenEventHandle( - phEvent: *mut CUevent, - handle: CUipcEventHandle - ) -> CUresult; + pub fn cuIpcOpenEventHandle(phEvent: *mut CUevent, handle: CUipcEventHandle) -> CUresult; pub fn cuIpcGetMemHandle(pHandle: *mut CUipcMemHandle, dptr: CUdeviceptr) -> CUresult; pub fn cuIpcOpenMemHandle( pdptr: *mut CUdeviceptr, handle: CUipcMemHandle, - Flags: ::libc::c_uint + Flags: ::libc::c_uint, ) -> CUresult; pub fn cuIpcCloseMemHandle(dptr: CUdeviceptr) -> CUresult; @@ -1607,7 +1765,7 @@ extern "C" { pub fn cuMemHostRegister_v2( p: *mut ::libc::c_void, bytesize: size_t, - Flags: ::libc::c_uint + Flags: ::libc::c_uint, ) -> CUresult; pub fn cuMemHostUnregister(p: *mut ::libc::c_void) -> CUresult; @@ -1619,52 +1777,53 @@ extern "C" { dstContext: CUcontext, srcDevice: CUdeviceptr, srcContext: CUcontext, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyHtoD_v2( dstDevice: CUdeviceptr, srcHost: *const ::libc::c_void, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyDtoH_v2( dstHost: *mut ::libc::c_void, srcDevice: CUdeviceptr, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyDtoD_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyDtoA_v2( dstArray: CUarray, dstOffset: size_t, srcDevice: CUdeviceptr, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyAtoD_v2( dstDevice: CUdeviceptr, srcArray: CUarray, - srcOffset: size_t, ByteCount: size_t + srcOffset: size_t, + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyHtoA_v2( dstArray: CUarray, dstOffset: size_t, srcHost: *const ::libc::c_void, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyAtoH_v2( dstHost: *mut ::libc::c_void, srcArray: CUarray, srcOffset: size_t, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpyAtoA_v2( @@ -1672,7 +1831,7 @@ extern "C" { dstOffset: size_t, srcArray: CUarray, srcOffset: size_t, - ByteCount: size_t + ByteCount: size_t, ) -> CUresult; pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult; @@ -1687,7 +1846,7 @@ extern "C" { dst: CUdeviceptr, src: CUdeviceptr, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyPeerAsync( @@ -1696,28 +1855,28 @@ extern "C" { srcDevice: CUdeviceptr, srcContext: CUcontext, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyHtoDAsync_v2( dstDevice: CUdeviceptr, srcHost: *const ::libc::c_void, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyDtoHAsync_v2( dstHost: *mut ::libc::c_void, srcDevice: CUdeviceptr, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyDtoDAsync_v2( dstDevice: CUdeviceptr, srcDevice: CUdeviceptr, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyHtoAAsync_v2( @@ -1725,7 +1884,7 @@ extern "C" { dstOffset: size_t, srcHost: *const ::libc::c_void, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpyAtoHAsync_v2( @@ -1733,44 +1892,26 @@ extern "C" { srcArray: CUarray, srcOffset: size_t, ByteCount: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult; - pub fn cuMemcpy3DAsync_v2( - pCopy: *const CUDA_MEMCPY3D, - hStream: CUstream - ) -> CUresult; + pub fn cuMemcpy3DAsync_v2(pCopy: *const CUDA_MEMCPY3D, hStream: CUstream) -> CUresult; - pub fn cuMemcpy3DPeerAsync( - pCopy: *const CUDA_MEMCPY3D_PEER, - hStream: CUstream - ) -> CUresult; + pub fn cuMemcpy3DPeerAsync(pCopy: *const CUDA_MEMCPY3D_PEER, hStream: CUstream) -> CUresult; - pub fn cuMemsetD8_v2( - dstDevice: CUdeviceptr, - uc: ::libc::c_uchar, - N: size_t - ) -> CUresult; + pub fn cuMemsetD8_v2(dstDevice: CUdeviceptr, uc: ::libc::c_uchar, N: size_t) -> CUresult; - pub fn cuMemsetD16_v2( - dstDevice: CUdeviceptr, - us: ::libc::c_ushort, - N: size_t - ) -> CUresult; + pub fn cuMemsetD16_v2(dstDevice: CUdeviceptr, us: ::libc::c_ushort, N: size_t) -> CUresult; - pub fn cuMemsetD32_v2( - dstDevice: CUdeviceptr, - ui: ::libc::c_uint, - N: size_t - ) -> CUresult; + pub fn cuMemsetD32_v2(dstDevice: CUdeviceptr, ui: ::libc::c_uint, N: size_t) -> CUresult; pub fn cuMemsetD2D8_v2( dstDevice: CUdeviceptr, dstPitch: size_t, uc: ::libc::c_uchar, Width: size_t, - Height: size_t + Height: size_t, ) -> CUresult; pub fn cuMemsetD2D16_v2( @@ -1778,7 +1919,7 @@ extern "C" { dstPitch: size_t, us: ::libc::c_ushort, Width: size_t, - Height: size_t + Height: size_t, ) -> CUresult; pub fn cuMemsetD2D32_v2( @@ -1786,28 +1927,28 @@ extern "C" { dstPitch: size_t, ui: ::libc::c_uint, Width: size_t, - Height: size_t + Height: size_t, ) -> CUresult; pub fn cuMemsetD8Async( dstDevice: CUdeviceptr, uc: ::libc::c_uchar, N: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemsetD16Async( dstDevice: CUdeviceptr, us: ::libc::c_ushort, N: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemsetD32Async( dstDevice: CUdeviceptr, ui: ::libc::c_uint, N: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemsetD2D8Async( @@ -1816,7 +1957,7 @@ extern "C" { uc: ::libc::c_uchar, Width: size_t, Height: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemsetD2D16Async( @@ -1825,7 +1966,7 @@ extern "C" { us: ::libc::c_ushort, Width: size_t, Height: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuMemsetD2D32Async( @@ -1834,41 +1975,41 @@ extern "C" { ui: ::libc::c_uint, Width: size_t, Height: size_t, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuArrayCreate_v2( pHandle: *mut CUarray, - pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR + pAllocateArray: *const CUDA_ARRAY_DESCRIPTOR, ) -> CUresult; pub fn cuArrayGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY_DESCRIPTOR, - hArray: CUarray + hArray: CUarray, ) -> CUresult; pub fn cuArrayDestroy(hArray: CUarray) -> CUresult; pub fn cuArray3DCreate_v2( pHandle: *mut CUarray, - pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR + pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR, ) -> CUresult; pub fn cuArray3DGetDescriptor_v2( pArrayDescriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, - hArray: CUarray + hArray: CUarray, ) -> CUresult; pub fn cuMipmappedArrayCreate( pHandle: *mut CUmipmappedArray, pMipmappedArrayDesc: *const CUDA_ARRAY3D_DESCRIPTOR, - numMipmapLevels: ::libc::c_uint + numMipmapLevels: ::libc::c_uint, ) -> CUresult; pub fn cuMipmappedArrayGetLevel( pLevelArray: *mut CUarray, hMipmappedArray: CUmipmappedArray, - level: ::libc::c_uint + level: ::libc::c_uint, ) -> CUresult; pub fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult; @@ -1876,61 +2017,49 @@ extern "C" { pub fn cuPointerGetAttribute( data: *mut ::libc::c_void, attribute: CUpointer_attribute, - ptr: CUdeviceptr + ptr: CUdeviceptr, ) -> CUresult; pub fn cuPointerSetAttribute( value: *const ::libc::c_void, attribute: CUpointer_attribute, - ptr: CUdeviceptr + ptr: CUdeviceptr, ) -> CUresult; pub fn cuPointerGetAttributes( numAttributes: ::libc::c_uint, attributes: *mut CUpointer_attribute, data: *mut *mut ::libc::c_void, - ptr: CUdeviceptr + ptr: CUdeviceptr, ) -> CUresult; - pub fn cuStreamCreate( - phStream: *mut CUstream, - Flags: ::libc::c_uint - ) -> CUresult; + pub fn cuStreamCreate(phStream: *mut CUstream, Flags: ::libc::c_uint) -> CUresult; pub fn cuStreamCreateWithPriority( phStream: *mut CUstream, flags: ::libc::c_uint, - priority: ::libc::c_int + priority: ::libc::c_int, ) -> CUresult; - pub fn cuStreamGetPriority( - hStream: CUstream, - priority: *mut ::libc::c_int - ) -> CUresult; + pub fn cuStreamGetPriority(hStream: CUstream, priority: *mut ::libc::c_int) -> CUresult; - pub fn cuStreamGetFlags( - hStream: CUstream, - flags: *mut ::libc::c_uint - ) -> CUresult; + pub fn cuStreamGetFlags(hStream: CUstream, flags: *mut ::libc::c_uint) -> CUresult; - pub fn cuStreamWaitEvent( - hStream: CUstream, - hEvent: CUevent, - Flags: ::libc::c_uint - ) -> CUresult; + pub fn cuStreamWaitEvent(hStream: CUstream, hEvent: CUevent, Flags: ::libc::c_uint) + -> CUresult; pub fn cuStreamAddCallback( hStream: CUstream, callback: CUstreamCallback, userData: *mut ::libc::c_void, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; pub fn cuStreamAttachMemAsync( hStream: CUstream, dptr: CUdeviceptr, length: size_t, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; pub fn cuStreamQuery(hStream: CUstream) -> CUresult; @@ -1939,10 +2068,7 @@ extern "C" { pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult; - pub fn cuEventCreate( - phEvent: *mut CUevent, - Flags: ::libc::c_uint - ) -> CUresult; + pub fn cuEventCreate(phEvent: *mut CUevent, Flags: ::libc::c_uint) -> CUresult; pub fn cuEventRecord(hEvent: CUevent, hStream: CUstream) -> CUresult; @@ -1955,13 +2081,13 @@ extern "C" { pub fn cuEventElapsedTime( pMilliseconds: *mut ::libc::c_float, hStart: CUevent, - hEnd: CUevent + hEnd: CUevent, ) -> CUresult; pub fn cuFuncGetAttribute( pi: *mut ::libc::c_int, attrib: CUfunction_attribute, - hfunc: CUfunction + hfunc: CUfunction, ) -> CUresult; pub fn cuFuncSetCacheConfig(hfunc: CUfunction, config: CUfunc_cache) -> CUresult; @@ -1975,16 +2101,17 @@ extern "C" { blockDimX: ::libc::c_uint, blockDimY: ::libc::c_uint, blockDimZ: ::libc::c_uint, - sharedMemBytes: ::libc::c_uint, hStream: CUstream, + sharedMemBytes: ::libc::c_uint, + hStream: CUstream, kernelParams: *mut *mut ::libc::c_void, - extra: *mut *mut ::libc::c_void + extra: *mut *mut ::libc::c_void, ) -> CUresult; pub fn cuOccupancyMaxActiveBlocksPerMultiprocessor( numBlocks: *mut ::libc::c_int, func: CUfunction, blockSize: ::libc::c_int, - dynamicSMemSize: size_t + dynamicSMemSize: size_t, ) -> CUresult; pub fn cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( @@ -1992,7 +2119,7 @@ extern "C" { func: CUfunction, blockSize: ::libc::c_int, dynamicSMemSize: size_t, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; pub fn cuOccupancyMaxPotentialBlockSize( @@ -2001,7 +2128,7 @@ extern "C" { func: CUfunction, blockSizeToDynamicSMemSize: CUoccupancyB2DSize, dynamicSMemSize: size_t, - blockSizeLimit: ::libc::c_int + blockSizeLimit: ::libc::c_int, ) -> CUresult; pub fn cuOccupancyMaxPotentialBlockSizeWithFlags( @@ -2011,58 +2138,53 @@ extern "C" { blockSizeToDynamicSMemSize: CUoccupancyB2DSize, dynamicSMemSize: size_t, blockSizeLimit: ::libc::c_int, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; - pub fn cuTexRefSetArray( - hTexRef: CUtexref, - hArray: CUarray, - Flags: ::libc::c_uint - ) -> CUresult; + pub fn cuTexRefSetArray(hTexRef: CUtexref, hArray: CUarray, Flags: ::libc::c_uint) -> CUresult; pub fn cuTexRefSetMipmappedArray( hTexRef: CUtexref, hMipmappedArray: CUmipmappedArray, - Flags: ::libc::c_uint + Flags: ::libc::c_uint, ) -> CUresult; pub fn cuTexRefSetAddress_v2( ByteOffset: *mut size_t, hTexRef: CUtexref, - dptr: CUdeviceptr, bytes: size_t + dptr: CUdeviceptr, + bytes: size_t, ) -> CUresult; pub fn cuTexRefSetAddress2D_v3( hTexRef: CUtexref, desc: *const CUDA_ARRAY_DESCRIPTOR, - dptr: CUdeviceptr, Pitch: size_t + dptr: CUdeviceptr, + Pitch: size_t, ) -> CUresult; pub fn cuTexRefSetFormat( hTexRef: CUtexref, fmt: CUarray_format, - NumPackedComponents: ::libc::c_int + NumPackedComponents: ::libc::c_int, ) -> CUresult; pub fn cuTexRefSetAddressMode( hTexRef: CUtexref, dim: ::libc::c_int, - am: CUaddress_mode + am: CUaddress_mode, ) -> CUresult; pub fn cuTexRefSetFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult; pub fn cuTexRefSetMipmapFilterMode(hTexRef: CUtexref, fm: CUfilter_mode) -> CUresult; - pub fn cuTexRefSetMipmapLevelBias( - hTexRef: CUtexref, - bias: ::libc::c_float - ) -> CUresult; + pub fn cuTexRefSetMipmapLevelBias(hTexRef: CUtexref, bias: ::libc::c_float) -> CUresult; pub fn cuTexRefSetMipmapLevelClamp( hTexRef: CUtexref, minMipmapLevelClamp: ::libc::c_float, - maxMipmapLevelClamp: ::libc::c_float + maxMipmapLevelClamp: ::libc::c_float, ) -> CUresult; pub fn cuTexRefSetMaxAnisotropy(hTexRef: CUtexref, maxAniso: ::libc::c_uint) -> CUresult; @@ -2075,13 +2197,13 @@ extern "C" { pub fn cuTexRefGetMipmappedArray( phMipmappedArray: *mut CUmipmappedArray, - hTexRef: CUtexref + hTexRef: CUtexref, ) -> CUresult; pub fn cuTexRefGetAddressMode( pam: *mut CUaddress_mode, hTexRef: CUtexref, - dim: ::libc::c_int + dim: ::libc::c_int, ) -> CUresult; pub fn cuTexRefGetFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult; @@ -2089,87 +2211,74 @@ extern "C" { pub fn cuTexRefGetFormat( pFormat: *mut CUarray_format, pNumChannels: *mut ::libc::c_int, - hTexRef: CUtexref + hTexRef: CUtexref, ) -> CUresult; - pub fn cuTexRefGetMipmapFilterMode( - pfm: *mut CUfilter_mode, - hTexRef: CUtexref - ) -> CUresult; + pub fn cuTexRefGetMipmapFilterMode(pfm: *mut CUfilter_mode, hTexRef: CUtexref) -> CUresult; - pub fn cuTexRefGetMipmapLevelBias( - pbias: *mut ::libc::c_float, - hTexRef: CUtexref - ) -> CUresult; + pub fn cuTexRefGetMipmapLevelBias(pbias: *mut ::libc::c_float, hTexRef: CUtexref) -> CUresult; pub fn cuTexRefGetMipmapLevelClamp( pminMipmapLevelClamp: *mut ::libc::c_float, pmaxMipmapLevelClamp: *mut ::libc::c_float, - hTexRef: CUtexref + hTexRef: CUtexref, ) -> CUresult; - pub fn cuTexRefGetMaxAnisotropy( - pmaxAniso: *mut ::libc::c_int, - hTexRef: CUtexref - ) -> CUresult; + pub fn cuTexRefGetMaxAnisotropy(pmaxAniso: *mut ::libc::c_int, hTexRef: CUtexref) -> CUresult; pub fn cuTexRefGetFlags(pFlags: *mut ::libc::c_uint, hTexRef: CUtexref) -> CUresult; pub fn cuSurfRefSetArray( - hSurfRef: CUsurfref, hArray: CUarray, - Flags: ::libc::c_uint + hSurfRef: CUsurfref, + hArray: CUarray, + Flags: ::libc::c_uint, ) -> CUresult; - pub fn cuSurfRefGetArray( - phArray: *mut CUarray, - hSurfRef: CUsurfref - ) -> CUresult; + pub fn cuSurfRefGetArray(phArray: *mut CUarray, hSurfRef: CUsurfref) -> CUresult; pub fn cuTexObjectCreate( pTexObject: *mut CUtexObject, pResDesc: *const CUDA_RESOURCE_DESC, pTexDesc: *const CUDA_TEXTURE_DESC, - pResViewDesc: *const CUDA_RESOURCE_VIEW_DESC + pResViewDesc: *const CUDA_RESOURCE_VIEW_DESC, ) -> CUresult; pub fn cuTexObjectDestroy(texObject: CUtexObject) -> CUresult; pub fn cuTexObjectGetResourceDesc( pResDesc: *mut CUDA_RESOURCE_DESC, - texObject: CUtexObject + texObject: CUtexObject, ) -> CUresult; pub fn cuTexObjectGetTextureDesc( pTexDesc: *mut CUDA_TEXTURE_DESC, - texObject: CUtexObject + texObject: CUtexObject, ) -> CUresult; pub fn cuTexObjectGetResourceViewDesc( pResViewDesc: *mut CUDA_RESOURCE_VIEW_DESC, - texObject: CUtexObject + texObject: CUtexObject, ) -> CUresult; pub fn cuSurfObjectCreate( pSurfObject: *mut CUsurfObject, - pResDesc: *const CUDA_RESOURCE_DESC + pResDesc: *const CUDA_RESOURCE_DESC, ) -> CUresult; pub fn cuSurfObjectDestroy(surfObject: CUsurfObject) -> CUresult; pub fn cuSurfObjectGetResourceDesc( pResDesc: *mut CUDA_RESOURCE_DESC, - surfObject: CUsurfObject + surfObject: CUsurfObject, ) -> CUresult; pub fn cuDeviceCanAccessPeer( canAccessPeer: *mut ::libc::c_int, - dev: CUdevice, peerDev: CUdevice + dev: CUdevice, + peerDev: CUdevice, ) -> CUresult; - pub fn cuCtxEnablePeerAccess( - peerContext: CUcontext, - Flags: ::libc::c_uint - ) -> CUresult; + pub fn cuCtxEnablePeerAccess(peerContext: CUcontext, Flags: ::libc::c_uint) -> CUresult; pub fn cuCtxDisablePeerAccess(peerContext: CUcontext) -> CUresult; @@ -2179,39 +2288,39 @@ extern "C" { pArray: *mut CUarray, resource: CUgraphicsResource, arrayIndex: ::libc::c_uint, - mipLevel: ::libc::c_uint + mipLevel: ::libc::c_uint, ) -> CUresult; pub fn cuGraphicsResourceGetMappedMipmappedArray( pMipmappedArray: *mut CUmipmappedArray, - resource: CUgraphicsResource + resource: CUgraphicsResource, ) -> CUresult; pub fn cuGraphicsResourceGetMappedPointer_v2( pDevPtr: *mut CUdeviceptr, pSize: *mut size_t, - resource: CUgraphicsResource + resource: CUgraphicsResource, ) -> CUresult; pub fn cuGraphicsResourceSetMapFlags_v2( resource: CUgraphicsResource, - flags: ::libc::c_uint + flags: ::libc::c_uint, ) -> CUresult; pub fn cuGraphicsMapResources( count: ::libc::c_uint, resources: *mut CUgraphicsResource, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuGraphicsUnmapResources( count: ::libc::c_uint, resources: *mut CUgraphicsResource, - hStream: CUstream + hStream: CUstream, ) -> CUresult; pub fn cuGetExportTable( ppExportTable: *mut *const ::libc::c_void, - pExportTableId: *const CUuuid + pExportTableId: *const CUuuid, ) -> CUresult; } diff --git a/coaster/src/frameworks/cuda/api/driver/memory.rs b/coaster/src/frameworks/cuda/api/driver/memory.rs index 690f34c98..ffd4bbe59 100644 --- a/coaster/src/frameworks/cuda/api/driver/memory.rs +++ b/coaster/src/frameworks/cuda/api/driver/memory.rs @@ -1,9 +1,9 @@ //! Provides the Cuda API with its memory/buffer functionality. -use super::{API, Error}; -use crate::frameworks::native::flatbox::FlatBox; -use crate::frameworks::cuda::Memory; use super::ffi::*; +use super::{Error, API}; +use crate::frameworks::cuda::Memory; +use crate::frameworks::native::flatbox::FlatBox; impl API { /// Allocates memory on the Cuda device. @@ -12,46 +12,81 @@ impl API { /// aligned for any kind of variable. The memory is not cleared. /// Returns a memory id for the created buffer, which can now be writen to. pub fn mem_alloc(bytesize: size_t) -> Result { - Ok(Memory::from_c(unsafe {API::ffi_mem_alloc(bytesize)}?)) + Ok(Memory::from_c(unsafe { API::ffi_mem_alloc(bytesize) }?)) } /// Releases allocated memory from the Cuda device. pub fn mem_free(memory: CUdeviceptr) -> Result<(), Error> { - unsafe {API::ffi_mem_free(memory)} + unsafe { API::ffi_mem_free(memory) } } /// Copies memory from the Host to the Cuda device. pub fn mem_cpy_h_to_d(host_mem: &FlatBox, device_mem: &mut Memory) -> Result<(), Error> { - unsafe {API::ffi_mem_cpy_h_to_d(*device_mem.id_c(), host_mem.as_slice().as_ptr(), host_mem.byte_size())} + unsafe { + API::ffi_mem_cpy_h_to_d( + *device_mem.id_c(), + host_mem.as_slice().as_ptr(), + host_mem.byte_size(), + ) + } } /// Copies memory from the Cuda device to the Host. pub fn mem_cpy_d_to_h(device_mem: &Memory, host_mem: &mut FlatBox) -> Result<(), Error> { - unsafe {API::ffi_mem_cpy_d_to_h(host_mem.as_mut_slice().as_mut_ptr(), *device_mem.id_c(), host_mem.byte_size())} + unsafe { + API::ffi_mem_cpy_d_to_h( + host_mem.as_mut_slice().as_mut_ptr(), + *device_mem.id_c(), + host_mem.byte_size(), + ) + } } unsafe fn ffi_mem_alloc(bytesize: size_t) -> Result { let mut memory_id: CUdeviceptr = 0; match cuMemAlloc_v2(&mut memory_id, bytesize) { CUresult::CUDA_SUCCESS => Ok(memory_id), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - CUresult::CUDA_ERROR_OUT_OF_MEMORY => Err(Error::OutOfMemory("Device is out of memory.")), - status => Err(Error::Unknown("Unable to allocate memory.", status as i32 as u64)), + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + CUresult::CUDA_ERROR_OUT_OF_MEMORY => { + Err(Error::OutOfMemory("Device is out of memory.")) + } + status => Err(Error::Unknown( + "Unable to allocate memory.", + status as i32 as u64, + )), } } unsafe fn ffi_mem_free(dptr: CUdeviceptr) -> Result<(), Error> { match cuMemFree_v2(dptr) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to free memory.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to free memory.", + status as i32 as u64, + )), } } @@ -62,12 +97,22 @@ impl API { ) -> Result<(), Error> { match cuMemcpyHtoD_v2(dst_device, src_host, byte_count) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to copy memory from host to device.", status as i32 as u64)), - + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to copy memory from host to device.", + status as i32 as u64, + )), } } @@ -79,11 +124,22 @@ impl API { let status = cuMemcpyDtoH_v2(dst_host, src_device, byte_count); match status { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_DEINITIALIZED => Err(Error::Deinitialized("CUDA got deinitialized.")), - CUresult::CUDA_ERROR_NOT_INITIALIZED => Err(Error::NotInitialized("CUDA is not initialized.")), - CUresult::CUDA_ERROR_INVALID_CONTEXT => Err(Error::InvalidContext("No valid context available.")), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), - status => Err(Error::Unknown("Unable to copy memory from device to host.", status as i32 as u64)), + CUresult::CUDA_ERROR_DEINITIALIZED => { + Err(Error::Deinitialized("CUDA got deinitialized.")) + } + CUresult::CUDA_ERROR_NOT_INITIALIZED => { + Err(Error::NotInitialized("CUDA is not initialized.")) + } + CUresult::CUDA_ERROR_INVALID_CONTEXT => { + Err(Error::InvalidContext("No valid context available.")) + } + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } + status => Err(Error::Unknown( + "Unable to copy memory from device to host.", + status as i32 as u64, + )), } } } diff --git a/coaster/src/frameworks/cuda/api/driver/mod.rs b/coaster/src/frameworks/cuda/api/driver/mod.rs index 7792890df..ba9cf76db 100644 --- a/coaster/src/frameworks/cuda/api/driver/mod.rs +++ b/coaster/src/frameworks/cuda/api/driver/mod.rs @@ -6,9 +6,9 @@ pub use self::error::Error; /// Defines the Cuda API. pub struct API; -mod error; mod context; mod device; -mod memory; +mod error; pub mod ffi; +mod memory; mod utils; diff --git a/coaster/src/frameworks/cuda/api/driver/utils.rs b/coaster/src/frameworks/cuda/api/driver/utils.rs index 5f539d5b2..9b4fb09f8 100644 --- a/coaster/src/frameworks/cuda/api/driver/utils.rs +++ b/coaster/src/frameworks/cuda/api/driver/utils.rs @@ -1,7 +1,7 @@ //! Provides Cuda Driver API utility functionality. -use super::{API, Error}; use super::ffi::*; +use super::{Error, API}; impl API { /// Initialize the Cuda Driver API. @@ -12,14 +12,20 @@ impl API { } unsafe fn ffi_init() -> Result<(), Error> { - const FLAGS : u32 = 0u32; + const FLAGS: u32 = 0u32; match cuInit(FLAGS) { CUresult::CUDA_SUCCESS => Ok(()), - CUresult::CUDA_ERROR_INVALID_VALUE => Err(Error::InvalidValue("Invalid value provided.")), + CUresult::CUDA_ERROR_INVALID_VALUE => { + Err(Error::InvalidValue("Invalid value provided.")) + } CUresult::CUDA_ERROR_INVALID_DEVICE => Err(Error::InvalidDevice("Invalid device.")), - CUresult::CUDA_ERROR_NO_DEVICE => Err(Error::NoDevice("Unable to find a CUDA device. Try run `nvidia-smi` on your console.")), - status => Err(Error::Unknown("Unable to initialze the Cuda Driver API.", status as i32 as u64)), - + CUresult::CUDA_ERROR_NO_DEVICE => Err(Error::NoDevice( + "Unable to find a CUDA device. Try run `nvidia-smi` on your console.", + )), + status => Err(Error::Unknown( + "Unable to initialze the Cuda Driver API.", + status as i32 as u64, + )), } } } diff --git a/coaster/src/frameworks/cuda/api/mod.rs b/coaster/src/frameworks/cuda/api/mod.rs index 8c7e8b327..cfc44d78f 100644 --- a/coaster/src/frameworks/cuda/api/mod.rs +++ b/coaster/src/frameworks/cuda/api/mod.rs @@ -4,8 +4,8 @@ //! * CUDA Driver API //! * CUDA cuDNN API -pub use self::driver::API as Driver; pub use self::driver::ffi as DriverFFI; pub use self::driver::Error as DriverError; +pub use self::driver::API as Driver; pub mod driver; diff --git a/coaster/src/frameworks/cuda/context.rs b/coaster/src/frameworks/cuda/context.rs index 344db8bfc..db309dfe5 100644 --- a/coaster/src/frameworks/cuda/context.rs +++ b/coaster/src/frameworks/cuda/context.rs @@ -1,12 +1,12 @@ //! Provides a Rust wrapper around Cuda's context. -use crate::device::{IDevice, MemorySync}; -use crate::device::Error as DeviceError; use super::api::DriverFFI; -use super::{Driver, DriverError, Device}; use super::memory::*; -use crate::frameworks::native::flatbox::FlatBox; +use super::{Device, Driver, DriverError}; +use crate::device::Error as DeviceError; +use crate::device::{IDevice, MemorySync}; use crate::frameworks::native::device::Cpu; +use crate::frameworks::native::flatbox::FlatBox; use std::any::Any; use std::hash::{Hash, Hasher}; use std::rc::Rc; @@ -31,19 +31,17 @@ impl Drop for Context { impl Context { /// Initializes a new Cuda context. pub fn new(devices: Device) -> Result { - Ok( - Context::from_c( - Driver::create_context(devices.clone())?, - vec!(devices) - ) - ) + Ok(Context::from_c( + Driver::create_context(devices.clone())?, + vec![devices], + )) } /// Initializes a new Cuda platform from its C type. pub fn from_c(id: DriverFFI::CUcontext, devices: Vec) -> Context { Context { id: Rc::new(id as isize), - devices + devices, } } @@ -89,8 +87,12 @@ impl IDevice for Context { } impl MemorySync for Context { - fn sync_in(&self, my_memory: &mut dyn Any, src_device: &dyn Any, src_memory: &dyn Any) - -> Result<(), DeviceError> { + fn sync_in( + &self, + my_memory: &mut dyn Any, + src_device: &dyn Any, + src_memory: &dyn Any, + ) -> Result<(), DeviceError> { if src_device.downcast_ref::().is_some() { let my_mem = my_memory.downcast_mut::().unwrap(); let src_mem = src_memory.downcast_ref::().unwrap(); @@ -101,8 +103,12 @@ impl MemorySync for Context { } } - fn sync_out(&self, my_memory: &dyn Any, dst_device: &dyn Any, dst_memory: &mut dyn Any) - -> Result<(), DeviceError> { + fn sync_out( + &self, + my_memory: &dyn Any, + dst_device: &dyn Any, + dst_memory: &mut dyn Any, + ) -> Result<(), DeviceError> { if dst_device.downcast_ref::().is_some() { let my_mem = my_memory.downcast_ref::().unwrap(); let dst_mem = dst_memory.downcast_mut::().unwrap(); diff --git a/coaster/src/frameworks/cuda/device.rs b/coaster/src/frameworks/cuda/device.rs index 90bc29fb3..9ecd3aad9 100644 --- a/coaster/src/frameworks/cuda/device.rs +++ b/coaster/src/frameworks/cuda/device.rs @@ -1,9 +1,9 @@ //! Provides a Rust wrapper around Cuda's device. -use crate::hardware::{IHardware, HardwareType}; use super::api::{Driver, DriverFFI}; -use std::io::Cursor; +use crate::hardware::{HardwareType, IHardware}; use byteorder::{LittleEndian, ReadBytesExt}; +use std::io::Cursor; #[derive(Debug, Clone)] /// Defines a Cuda Device. @@ -30,12 +30,18 @@ impl Default for Device { impl Device { /// Initializes a new Cuda device. pub fn from_isize(id: isize) -> Device { - Device { id, ..Device::default() } + Device { + id, + ..Device::default() + } } /// Initializes a new Cuda device from its C type. pub fn from_c(id: DriverFFI::CUdevice) -> Device { - Device { id: id as isize, ..Device::default() } + Device { + id: id as isize, + ..Device::default() + } } /// Returns the id as its C type. @@ -45,10 +51,11 @@ impl Device { /// Loads the name of the device via a foreign Cuda call. pub fn load_name(&mut self) -> Self { - self.name = match Driver::load_device_info(self, DriverFFI::CUdevice_attribute::CU_DEVICE_NAME) { - Ok(result) => Some(result.to_string()), - Err(_) => None - }; + self.name = + match Driver::load_device_info(self, DriverFFI::CUdevice_attribute::CU_DEVICE_NAME) { + Ok(result) => Some(result.to_string()), + Err(_) => None, + }; self.clone() } @@ -60,9 +67,12 @@ impl Device { /// Loads the compute units of the device via a foreign Cuda call. pub fn load_compute_units(&mut self) -> Self { - self.compute_units = match Driver::load_device_info(self, DriverFFI::CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT) { + self.compute_units = match Driver::load_device_info( + self, + DriverFFI::CUdevice_attribute::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, + ) { Ok(result) => Some(result.to_isize()), - Err(_) => None + Err(_) => None, }; self.clone() } diff --git a/coaster/src/frameworks/cuda/memory.rs b/coaster/src/frameworks/cuda/memory.rs index 807248e54..56e5744d5 100644 --- a/coaster/src/frameworks/cuda/memory.rs +++ b/coaster/src/frameworks/cuda/memory.rs @@ -1,9 +1,9 @@ //! Provides a Rust wrapper around Cuda's memory. -use super::api::{Driver, DriverFFI, DriverError}; +use super::api::{Driver, DriverError, DriverFFI}; use crate::device::IMemory; -use std::{ptr, fmt}; +use std::{fmt, ptr}; /// Defines a Cuda Memory. pub struct Memory { diff --git a/coaster/src/frameworks/cuda/mod.rs b/coaster/src/frameworks/cuda/mod.rs index e19fb506e..94702bc00 100644 --- a/coaster/src/frameworks/cuda/mod.rs +++ b/coaster/src/frameworks/cuda/mod.rs @@ -7,27 +7,26 @@ //! Cuda device -> Hardware //! Cuda context -> Device -extern {} +extern "C" {} -use crate::backend::{Backend, IBackend}; -use crate::framework::IFramework; -pub use self::memory::Memory; +pub use self::api::{Driver, DriverError}; pub use self::context::Context; +pub use self::device::{Device, DeviceInfo}; pub use self::function::Function; +pub use self::memory::Memory; pub use self::module::Module; -pub use self::device::{Device, DeviceInfo}; -pub use self::api::{Driver, DriverError}; -use crate::cudnn::*; +use crate::backend::{Backend, IBackend}; use crate::cublas; +use crate::cudnn::*; +use crate::framework::IFramework; use crate::BackendConfig; -pub mod device; +mod api; pub mod context; +pub mod device; pub mod function; pub mod memory; pub mod module; -mod api; - /// Initialise the CUDA, CUBLAS, and CUDNN APIs /// @@ -65,7 +64,9 @@ impl Cuda { pub fn initialise_cublas(&mut self) -> Result<(), crate::framework::Error> { self.cublas = { let mut context = cublas::Context::new().unwrap(); - context.set_pointer_mode(cublas::api::PointerMode::Device).unwrap(); + context + .set_pointer_mode(cublas::api::PointerMode::Device) + .unwrap(); Some(context) }; Ok(()) @@ -75,7 +76,7 @@ impl Cuda { pub fn initialise_cudnn(&mut self) -> Result<(), crate::framework::Error> { self.cudnn = match Cudnn::new() { Ok(cudnn_ptr) => Some(cudnn_ptr), - Err(_) => None + Err(_) => None, }; Ok(()) } @@ -84,7 +85,7 @@ impl Cuda { pub fn cudnn(&self) -> &Cudnn { match &self.cudnn { Some(cudnn) => cudnn, - None => panic!("Couldn't find a CUDNN Handle - Initialise CUDNN has not been called") + None => panic!("Couldn't find a CUDNN Handle - Initialise CUDNN has not been called"), } } @@ -92,7 +93,7 @@ impl Cuda { pub fn cublas(&self) -> &cublas::Context { match &self.cublas { Some(cublas) => cublas, - None => panic!("Couldn't find a CUBLAS Handle - Initialise CUBLAS has not been called") + None => panic!("Couldn't find a CUBLAS Handle - Initialise CUBLAS has not been called"), } } } @@ -102,7 +103,9 @@ impl IFramework for Cuda { type D = Context; type B = Module; - fn ID() -> &'static str { "CUDA" } + fn ID() -> &'static str { + "CUDA" + } fn new() -> Cuda { // Init function must be called before any other function from the Cuda Driver API can be @@ -111,15 +114,13 @@ impl IFramework for Cuda { panic!("Unable to initialize Cuda Framework: {}", err); } match Cuda::load_hardwares() { - Ok(hardwares) => { - Cuda { - hardwares, - binary: Module::from_isize(1), - cudnn: None, - cublas: None, - } + Ok(hardwares) => Cuda { + hardwares, + binary: Module::from_isize(1), + cudnn: None, + cublas: None, }, - Err(err) => panic!("Could not initialize Cuda Framework, due to: {}", err) + Err(err) => panic!("Could not initialize Cuda Framework, due to: {}", err), } } diff --git a/coaster/src/frameworks/cuda/module.rs b/coaster/src/frameworks/cuda/module.rs index 614220f74..04469627f 100644 --- a/coaster/src/frameworks/cuda/module.rs +++ b/coaster/src/frameworks/cuda/module.rs @@ -14,9 +14,7 @@ pub struct Module { impl Module { /// Initializes a new Cuda Module. pub fn from_isize(id: isize) -> Module { - Module { - id, - } + Module { id } } // /// Initializes a new Cuda Module from its C type. diff --git a/coaster/src/frameworks/mod.rs b/coaster/src/frameworks/mod.rs index 8486e9c33..b85f0c5ac 100644 --- a/coaster/src/frameworks/mod.rs +++ b/coaster/src/frameworks/mod.rs @@ -1,13 +1,13 @@ //! Exposes the specific Framework implementations. +#[cfg(feature = "cuda")] +pub use self::cuda::Cuda; pub use self::native::Native; #[cfg(feature = "opencl")] pub use self::opencl::OpenCL; -#[cfg(feature = "cuda")] -pub use self::cuda::Cuda; +#[cfg(feature = "cuda")] +pub mod cuda; pub mod native; #[cfg(feature = "opencl")] pub mod opencl; -#[cfg(feature = "cuda")] -pub mod cuda; diff --git a/coaster/src/frameworks/native/binary.rs b/coaster/src/frameworks/native/binary.rs index 3ebe6aeb4..442d12efd 100644 --- a/coaster/src/frameworks/native/binary.rs +++ b/coaster/src/frameworks/native/binary.rs @@ -11,9 +11,7 @@ pub struct Binary { impl Binary { /// Initializes the native CPU binary. pub fn new() -> Binary { - Binary { - id: 0, - } + Binary { id: 0 } } } diff --git a/coaster/src/frameworks/native/device.rs b/coaster/src/frameworks/native/device.rs index 4ddc549a0..581417faa 100644 --- a/coaster/src/frameworks/native/device.rs +++ b/coaster/src/frameworks/native/device.rs @@ -2,11 +2,11 @@ use std::any::Any; use std::hash::{Hash, Hasher}; -use crate::device::{IDevice, MemorySync}; -use crate::device::Error as DeviceError; -use super::hardware::Hardware; -use super::flatbox::FlatBox; use super::allocate_boxed_slice; +use super::flatbox::FlatBox; +use super::hardware::Hardware; +use crate::device::Error as DeviceError; +use crate::device::{IDevice, MemorySync}; #[derive(Debug, Clone)] /// Defines the host CPU Hardware. @@ -14,7 +14,7 @@ use super::allocate_boxed_slice; /// Can later be transformed into a [Coaster hardware][hardware]. /// [hardware]: ../../hardware/index.html pub struct Cpu { - hardwares: Vec + hardwares: Vec, } impl Cpu { @@ -45,24 +45,36 @@ impl IDevice for Cpu { impl MemorySync for Cpu { // transfers from/to Cuda and OpenCL are defined on their MemorySync traits - fn sync_in(&self, my_memory: &mut dyn Any, src_device: &dyn Any, src_memory: &dyn Any) - -> Result<(), DeviceError> { + fn sync_in( + &self, + my_memory: &mut dyn Any, + src_device: &dyn Any, + src_memory: &dyn Any, + ) -> Result<(), DeviceError> { if src_device.downcast_ref::().is_some() { let my_mem = my_memory.downcast_mut::().unwrap(); let src_mem = src_memory.downcast_ref::().unwrap(); - my_mem.as_mut_slice::().clone_from_slice(src_mem.as_slice::()); + my_mem + .as_mut_slice::() + .clone_from_slice(src_mem.as_slice::()); return Ok(()); } Err(DeviceError::NoMemorySyncRoute) } - fn sync_out(&self, my_memory: &dyn Any, dst_device: &dyn Any, dst_memory: &mut dyn Any) - -> Result<(), DeviceError> { + fn sync_out( + &self, + my_memory: &dyn Any, + dst_device: &dyn Any, + dst_memory: &mut dyn Any, + ) -> Result<(), DeviceError> { if dst_device.downcast_ref::().is_some() { let my_mem = my_memory.downcast_ref::().unwrap(); let dst_mem = dst_memory.downcast_mut::().unwrap(); - dst_mem.as_mut_slice::().clone_from_slice(my_mem.as_slice::()); + dst_mem + .as_mut_slice::() + .clone_from_slice(my_mem.as_slice::()); return Ok(()); } diff --git a/coaster/src/frameworks/native/error.rs b/coaster/src/frameworks/native/error.rs index 22931d18c..aae3e213d 100644 --- a/coaster/src/frameworks/native/error.rs +++ b/coaster/src/frameworks/native/error.rs @@ -1,6 +1,5 @@ /// Defines a generic set of Native Errors. - -use std::{fmt, error}; +use std::{error, fmt}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] /// Defines the Native Error. diff --git a/coaster/src/frameworks/native/flatbox.rs b/coaster/src/frameworks/native/flatbox.rs index f23a5ecf2..4616fb2b2 100644 --- a/coaster/src/frameworks/native/flatbox.rs +++ b/coaster/src/frameworks/native/flatbox.rs @@ -8,7 +8,7 @@ use std::slice; /// A Box without any knowledge of its underlying type. pub struct FlatBox { len: usize, - raw_box: *mut [u8] + raw_box: *mut [u8], } impl FlatBox { @@ -16,7 +16,7 @@ impl FlatBox { pub fn from_box(b: Box<[u8]>) -> FlatBox { FlatBox { len: b.len(), - raw_box: Box::into_raw(b) + raw_box: Box::into_raw(b), } } @@ -24,24 +24,14 @@ impl FlatBox { /// /// The preffered way to access native memory. pub fn as_slice(&self) -> &[T] { - unsafe { - slice::from_raw_parts_mut( - self.raw_box as *mut T, - self.len / mem::size_of::() - ) - } + unsafe { slice::from_raw_parts_mut(self.raw_box as *mut T, self.len / mem::size_of::()) } } /// Access memory as mutable slice. /// /// The preffered way to access native memory. pub fn as_mut_slice(&mut self) -> &mut [T] { - unsafe { - slice::from_raw_parts_mut( - self.raw_box as *mut T, - self.len / mem::size_of::() - ) - } + unsafe { slice::from_raw_parts_mut(self.raw_box as *mut T, self.len / mem::size_of::()) } } /// Returns memory size of the Flatbox. diff --git a/coaster/src/frameworks/native/hardware.rs b/coaster/src/frameworks/native/hardware.rs index a3020cd7a..f7f5381ff 100644 --- a/coaster/src/frameworks/native/hardware.rs +++ b/coaster/src/frameworks/native/hardware.rs @@ -1,6 +1,6 @@ //! Provides a hardware aka. the host CPU. -use crate::hardware::{IHardware, HardwareType}; +use crate::hardware::{HardwareType, IHardware}; #[derive(Debug, Clone)] /// Defines the host CPU Hardware. @@ -28,7 +28,10 @@ impl Default for Hardware { impl Hardware { /// Initializes a new OpenCL hardware. pub fn new(id: isize) -> Hardware { - Hardware { id, ..Hardware::default() } + Hardware { + id, + ..Hardware::default() + } } } diff --git a/coaster/src/frameworks/native/mod.rs b/coaster/src/frameworks/native/mod.rs index e2595497a..ad59b0d46 100644 --- a/coaster/src/frameworks/native/mod.rs +++ b/coaster/src/frameworks/native/mod.rs @@ -4,31 +4,30 @@ //! //! -use crate::framework::IFramework; -use crate::backend::{Backend, IBackend}; -use crate::hardware::{HardwareType, IHardware}; -use self::hardware::Hardware; -pub use self::device::Cpu; -pub use self::function::Function; pub use self::binary::Binary; +pub use self::device::Cpu; pub use self::error::Error; +pub use self::function::Function; +use self::hardware::Hardware; #[cfg(not(feature = "unstable_alloc"))] pub use self::stable_alloc::allocate_boxed_slice; #[cfg(feature = "unstable_alloc")] pub use self::unstable_alloc::allocate_boxed_slice; +use crate::backend::{Backend, IBackend}; +use crate::framework::IFramework; +use crate::hardware::{HardwareType, IHardware}; +pub mod binary; pub mod device; +mod error; pub mod flatbox; -pub mod hardware; pub mod function; -pub mod binary; -mod error; +pub mod hardware; #[cfg(not(feature = "unstable_alloc"))] mod stable_alloc; #[cfg(feature = "unstable_alloc")] mod unstable_alloc; - /// Initialise the Native Backend for running Tensor Operations pub fn get_native_backend() -> Backend { Backend::::default().unwrap() @@ -55,7 +54,9 @@ impl IFramework for Native { type D = Cpu; type B = Binary; - fn ID() -> &'static str { "NATIVE" } + fn ID() -> &'static str { + "NATIVE" + } fn new() -> Native { let hardwares = Native::load_hardwares().expect("Native hardwares are always ok. qed"); @@ -71,7 +72,7 @@ impl IFramework for Native { .set_hardware_type(Some(HardwareType::CPU)) .set_compute_units(Some(1)) .build(); - Ok(vec!(cpu)) + Ok(vec![cpu]) } fn hardwares(&self) -> &[Hardware] { diff --git a/coaster/src/frameworks/native/unstable_alloc.rs b/coaster/src/frameworks/native/unstable_alloc.rs index 8c425fba6..9aad20b8e 100644 --- a/coaster/src/frameworks/native/unstable_alloc.rs +++ b/coaster/src/frameworks/native/unstable_alloc.rs @@ -3,7 +3,5 @@ use alloc::raw_vec::RawVec; /// Alternative way to allocate memory, requiring unstable RawVec. pub fn allocate_boxed_slice(cap: usize) -> Box<[u8]> { let raw = RawVec::with_capacity(cap); - unsafe { - raw.into_box() - } + unsafe { raw.into_box() } } diff --git a/coaster/src/frameworks/opencl/api/context.rs b/coaster/src/frameworks/opencl/api/context.rs index 9d8b7418a..cc492b8d7 100644 --- a/coaster/src/frameworks/opencl/api/context.rs +++ b/coaster/src/frameworks/opencl/api/context.rs @@ -2,13 +2,13 @@ //! //! At Coaster device can be understood as a synonym to OpenCL's context. -use libc; -use frameworks::opencl::{API, Device, Error, Platform}; -use frameworks::opencl::context::{ContextInfo,ContextInfoQuery,ContextProperties}; -use super::types as cl; use super::ffi::*; -use std::ptr; +use super::types as cl; +use frameworks::opencl::context::{ContextInfo, ContextInfoQuery, ContextProperties}; +use frameworks::opencl::{Device, Error, Platform, API}; +use libc; use std::mem::size_of; +use std::ptr; impl API { /// Creates a OpenCL context. @@ -20,13 +20,24 @@ impl API { pub fn create_context( devices: Vec, properties: *const cl::context_properties, - callback: extern fn (*const libc::c_char, *const libc::c_void, libc::size_t, *mut libc::c_void), - user_data: *mut libc::c_void + callback: extern "C" fn( + *const libc::c_char, + *const libc::c_void, + libc::size_t, + *mut libc::c_void, + ), + user_data: *mut libc::c_void, ) -> Result { let device_ids: Vec = devices.iter().map(|device| device.id_c()).collect(); - Ok( - unsafe { API::ffi_create_context(properties, device_ids.len() as u32, device_ids.as_ptr(), callback, user_data) }? - ) + Ok(unsafe { + API::ffi_create_context( + properties, + device_ids.len() as u32, + device_ids.as_ptr(), + callback, + user_data, + ) + }?) } /// FFI Creates an OpenCL context. @@ -34,11 +45,23 @@ impl API { properties: *const cl::context_properties, num_devices: cl::uint, devices: *const cl::device_id, - pfn_notify: extern fn (*const libc::c_char, *const libc::c_void, libc::size_t, *mut libc::c_void), - user_data: *mut libc::c_void + pfn_notify: extern "C" fn( + *const libc::c_char, + *const libc::c_void, + libc::size_t, + *mut libc::c_void, + ), + user_data: *mut libc::c_void, ) -> Result { let mut errcode: i32 = 0; - let context_id = clCreateContext(properties, num_devices, devices, pfn_notify, user_data, &mut errcode); + let context_id = clCreateContext( + properties, + num_devices, + devices, + pfn_notify, + user_data, + &mut errcode, + ); match errcode { errcode if errcode == cl::Status::SUCCESS as i32 => Ok(context_id), errcode if errcode == cl::Status::INVALID_PLATFORM as i32 => Err(Error::InvalidPlatform("properties is NULL and no platform could be selected or if platform value specified in propertiesis not a valid platform.")), @@ -55,10 +78,9 @@ impl API { /// Gets info about one of the available properties of an OpenCL context. pub fn get_context_info( context: cl::context_id, - query: ContextInfoQuery , + query: ContextInfoQuery, ) -> Result { - - let info_name : cl::context_info = match query { + let info_name: cl::context_info = match query { ContextInfoQuery::ReferenceCount => cl::CL_CONTEXT_REFERENCE_COUNT, ContextInfoQuery::NumDevices => cl::CL_CONTEXT_NUM_DEVICES, ContextInfoQuery::Properties => cl::CL_CONTEXT_PROPERTIES, @@ -69,106 +91,113 @@ impl API { unsafe { let mut zero: usize = 0; let info_size: *mut usize = &mut zero; - API::ffi_get_context_info_size(context, info_name, info_size) - .and_then(|_| { - let mut buffer = vec![0u8; *info_size]; - let info_ptr: *mut libc::c_void = buffer.as_mut_ptr() as *mut libc::c_void; - API::ffi_get_context_info(context, - info_name, - *info_size, - info_ptr) - .and_then(|_| { - match info_name { - cl::CL_CONTEXT_REFERENCE_COUNT => { - let reference_count : u32 = *(info_ptr as *mut u32); - Ok(ContextInfo::ReferenceCount(reference_count)) - }, - cl::CL_CONTEXT_DEVICES => { - let len = *info_size / size_of::(); - let mut dev_ids : Vec = Vec::new(); - let info_ptr : *mut cl::device_id = info_ptr as *mut cl::device_id; - for i in 0..len as isize { - dev_ids.push(*info_ptr.offset(i)); + API::ffi_get_context_info_size(context, info_name, info_size).and_then(|_| { + let mut buffer = vec![0u8; *info_size]; + let info_ptr: *mut libc::c_void = buffer.as_mut_ptr() as *mut libc::c_void; + API::ffi_get_context_info(context, info_name, *info_size, info_ptr).and_then( + |_| { + match info_name { + cl::CL_CONTEXT_REFERENCE_COUNT => { + let reference_count: u32 = *(info_ptr as *mut u32); + Ok(ContextInfo::ReferenceCount(reference_count)) } - Ok(ContextInfo::Devices( - dev_ids - .iter() - .map(|&id| Device::from_isize(id as isize)) - .collect() - )) - }, - cl::CL_CONTEXT_NUM_DEVICES => { - let device_count : u32 = *(info_ptr as *mut u32); - Ok(ContextInfo::NumDevices(device_count)) - }, - cl::CL_CONTEXT_PROPERTIES => { - let mut v : Vec = Vec::new(); - let mut ptr : *mut u8 = info_ptr as *mut u8; - let mut total_decoded: isize = 0; - let info_size = *info_size as isize; - while total_decoded < info_size { - // get the identifier and advance by identifier size count bytes - let identifier : *mut cl::context_properties = ptr as *mut cl::context_properties; - let identifier = *identifier; - ptr = ptr.offset(size_of::() as isize); - // depending on the identifier decode the per identifier payload/argument with the - // corresponding type - match identifier { - cl::CL_CONTEXT_PLATFORM => { - let platform_id : *const cl::platform_id = info_ptr as *const cl::platform_id; - let platform_id = *platform_id; - let size = size_of::() as isize; - total_decoded += size; - ptr = ptr.offset(size); - v.push(ContextProperties::Platform(Platform::from_c(platform_id))); - }, - cl::CL_CONTEXT_INTEROP_USER_SYNC => { - let interop_user_sync : *const cl::boolean = info_ptr as *const cl::boolean; - let interop_user_sync = *interop_user_sync == 0; - let size = size_of::() as isize; - total_decoded += size; - ptr = ptr.offset(size); - v.push(ContextProperties::InteropUserSync(interop_user_sync)); - }, - 0 => { - break; - } - _ => { - return Err(Error::Other("Unknown property")); - } - }; + cl::CL_CONTEXT_DEVICES => { + let len = *info_size / size_of::(); + let mut dev_ids: Vec = Vec::new(); + let info_ptr: *mut cl::device_id = + info_ptr as *mut cl::device_id; + for i in 0..len as isize { + dev_ids.push(*info_ptr.offset(i)); + } + Ok(ContextInfo::Devices( + dev_ids + .iter() + .map(|&id| Device::from_isize(id as isize)) + .collect(), + )) } - Ok(ContextInfo::Properties(v)) - } - _ => { - Err(Error::Other("Unknown property")) + cl::CL_CONTEXT_NUM_DEVICES => { + let device_count: u32 = *(info_ptr as *mut u32); + Ok(ContextInfo::NumDevices(device_count)) + } + cl::CL_CONTEXT_PROPERTIES => { + let mut v: Vec = Vec::new(); + let mut ptr: *mut u8 = info_ptr as *mut u8; + let mut total_decoded: isize = 0; + let info_size = *info_size as isize; + while total_decoded < info_size { + // get the identifier and advance by identifier size count bytes + let identifier: *mut cl::context_properties = + ptr as *mut cl::context_properties; + let identifier = *identifier; + ptr = ptr + .offset(size_of::() as isize); + // depending on the identifier decode the per identifier payload/argument with the + // corresponding type + match identifier { + cl::CL_CONTEXT_PLATFORM => { + let platform_id: *const cl::platform_id = + info_ptr as *const cl::platform_id; + let platform_id = *platform_id; + let size = size_of::() as isize; + total_decoded += size; + ptr = ptr.offset(size); + v.push(ContextProperties::Platform( + Platform::from_c(platform_id), + )); + } + cl::CL_CONTEXT_INTEROP_USER_SYNC => { + let interop_user_sync: *const cl::boolean = + info_ptr as *const cl::boolean; + let interop_user_sync = *interop_user_sync == 0; + let size = size_of::() as isize; + total_decoded += size; + ptr = ptr.offset(size); + v.push(ContextProperties::InteropUserSync( + interop_user_sync, + )); + } + 0 => { + break; + } + _ => { + return Err(Error::Other("Unknown property")); + } + }; + } + Ok(ContextInfo::Properties(v)) + } + _ => Err(Error::Other("Unknown property")), } - } - }) - }) + }, + ) + }) } - }?) } - + // This function calls clGetContextInfo with the return data pointer set to // NULL to find out the needed memory allocation first. unsafe fn ffi_get_context_info_size( context: cl::context_id, param_name: cl::context_info, - param_value_size_ret: *mut libc::size_t + param_value_size_ret: *mut libc::size_t, ) -> Result<(), Error> { - match clGetContextInfo(context, - param_name, - 0, - ptr::null_mut(), - param_value_size_ret) { + match clGetContextInfo( + context, + param_name, + 0, + ptr::null_mut(), + param_value_size_ret, + ) { cl::Status::SUCCESS => Ok(()), cl::Status::INVALID_CONTEXT => Err(Error::InvalidContext("Invalid context")), cl::Status::INVALID_VALUE => Err(Error::InvalidValue("Invalid value")), cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources("Out of resources")), cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Out of host memory")), - _ => Err(Error::Other("Could not determine needed memory to allocate context info.")) + _ => Err(Error::Other( + "Could not determine needed memory to allocate context info.", + )), } } @@ -179,18 +208,23 @@ impl API { context: cl::context_id, param_name: cl::context_info, param_value_size: libc::size_t, - param_value: *mut libc::c_void) -> Result<(), Error> { - match clGetContextInfo(context, - param_name, - param_value_size, - param_value, - ptr::null_mut()) { + param_value: *mut libc::c_void, + ) -> Result<(), Error> { + match clGetContextInfo( + context, + param_name, + param_value_size, + param_value, + ptr::null_mut(), + ) { cl::Status::SUCCESS => Ok(()), cl::Status::INVALID_CONTEXT => Err(Error::InvalidContext("Invalid context")), cl::Status::INVALID_VALUE => Err(Error::InvalidValue("Invalid value")), cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources("Out of resources")), cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Out of host memory")), - _ => Err(Error::Other("Could not determine needed memory to allocate context info.")) + _ => Err(Error::Other( + "Could not determine needed memory to allocate context info.", + )), } } } diff --git a/coaster/src/frameworks/opencl/api/device.rs b/coaster/src/frameworks/opencl/api/device.rs index 3f13c33ba..6988e1b9f 100644 --- a/coaster/src/frameworks/opencl/api/device.rs +++ b/coaster/src/frameworks/opencl/api/device.rs @@ -2,13 +2,13 @@ //! //! At Coaster hardware can be understood as a synonym to OpenCL's device. -use libc; -use frameworks::opencl::{API, Error}; -use frameworks::opencl::{Platform, Device, DeviceInfo}; -use super::types as cl; use super::ffi::*; -use std::ptr; +use super::types as cl; +use frameworks::opencl::{Device, DeviceInfo, Platform}; +use frameworks::opencl::{Error, API}; +use libc; use std::iter::repeat; +use std::ptr; impl API { /// Returns fully initialized devices for a specific platform. @@ -17,17 +17,17 @@ impl API { /// information. pub fn load_devices(platform: &Platform) -> Result, Error> { match API::load_device_list(platform) { - Ok(device_list) => { - Ok( - device_list.iter().map(|device| { - device.clone() - .load_name() - .load_device_type() - .load_compute_units() - }).collect() - ) - }, - Err(err) => Err(err) + Ok(device_list) => Ok(device_list + .iter() + .map(|device| { + device + .clone() + .load_name() + .load_device_type() + .load_compute_units() + }) + .collect()), + Err(err) => Err(err), } } @@ -36,27 +36,45 @@ impl API { let mut num_devices = 0; // load how many devices are available - unsafe { API::ffi_get_device_ids(platform.id_c(), cl::CL_DEVICE_TYPE_ALL, 0, ptr::null_mut(), &mut num_devices) }?; + unsafe { + API::ffi_get_device_ids( + platform.id_c(), + cl::CL_DEVICE_TYPE_ALL, + 0, + ptr::null_mut(), + &mut num_devices, + ) + }?; // prepare device id list - let mut ids: Vec = repeat(0 as cl::device_id).take(num_devices as usize).collect(); + let mut ids: Vec = repeat(0 as cl::device_id) + .take(num_devices as usize) + .collect(); // load the specific devices - unsafe { API::ffi_get_device_ids(platform.id_c(), cl::CL_DEVICE_TYPE_ALL, ids.len() as cl::uint, ids.as_mut_ptr(), ptr::null_mut()) }?; + unsafe { + API::ffi_get_device_ids( + platform.id_c(), + cl::CL_DEVICE_TYPE_ALL, + ids.len() as cl::uint, + ids.as_mut_ptr(), + ptr::null_mut(), + ) + }?; - Ok(ids.iter().map(|id| Device::from_c(*id) ).collect()) + Ok(ids.iter().map(|id| Device::from_c(*id)).collect()) } /// Returns the requested DeviceInfo for the provided device. pub fn load_device_info(device: &Device, info: cl::device_info) -> Result { let mut size = 0; - unsafe {API::ffi_get_device_info(device.id_c(), info, 0, ptr::null_mut(), &mut size)}?; + unsafe { API::ffi_get_device_info(device.id_c(), info, 0, ptr::null_mut(), &mut size) }?; let mut buf: Vec = repeat(0u8).take(size).collect(); let buf_ptr = buf.as_mut_ptr() as *mut libc::c_void; - unsafe {API::ffi_get_device_info(device.id_c(), info, size, buf_ptr, ptr::null_mut())}?; + unsafe { API::ffi_get_device_info(device.id_c(), info, size, buf_ptr, ptr::null_mut()) }?; Ok(DeviceInfo::new(buf)) } @@ -66,16 +84,26 @@ impl API { device_type: cl::device_type, num_entries: cl::uint, devices: *mut cl::device_id, - num_devices: *mut cl::uint + num_devices: *mut cl::uint, ) -> Result<(), Error> { match clGetDeviceIDs(platform, device_type, num_entries, devices, num_devices) { cl::Status::SUCCESS => Ok(()), - cl::Status::INVALID_PLATFORM => Err(Error::InvalidPlatform("`platform` is not a valid platform")), - cl::Status::INVALID_DEVICE_TYPE => Err(Error::InvalidDeviceType("`device type` is not a valid device type")), - cl::Status::DEVICE_NOT_FOUND => Err(Error::DeviceNotFound("no devices for `device type` found")), - cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources("Failure to allocate resources on the device")), - cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Failure to allocate resources on the host")), - _ => Err(Error::Other("Unable to get device ids")) + cl::Status::INVALID_PLATFORM => { + Err(Error::InvalidPlatform("`platform` is not a valid platform")) + } + cl::Status::INVALID_DEVICE_TYPE => Err(Error::InvalidDeviceType( + "`device type` is not a valid device type", + )), + cl::Status::DEVICE_NOT_FOUND => { + Err(Error::DeviceNotFound("no devices for `device type` found")) + } + cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources( + "Failure to allocate resources on the device", + )), + cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory( + "Failure to allocate resources on the host", + )), + _ => Err(Error::Other("Unable to get device ids")), } } @@ -84,14 +112,26 @@ impl API { param_name: cl::device_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t + param_value_size_ret: *mut libc::size_t, ) -> Result<(), Error> { - match clGetDeviceInfo(device, param_name, param_value_size, param_value, param_value_size_ret) { + match clGetDeviceInfo( + device, + param_name, + param_value_size, + param_value, + param_value_size_ret, + ) { cl::Status::SUCCESS => Ok(()), - cl::Status::INVALID_VALUE => Err(Error::InvalidValue("`param_name` is not one of the supported values")), - cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources("Failure to allocate resources on the device")), - cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Failure to allocate resources on the host")), - _ => Err(Error::Other("Unable to get device info string length")) + cl::Status::INVALID_VALUE => Err(Error::InvalidValue( + "`param_name` is not one of the supported values", + )), + cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources( + "Failure to allocate resources on the device", + )), + cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory( + "Failure to allocate resources on the host", + )), + _ => Err(Error::Other("Unable to get device info string length")), } } } diff --git a/coaster/src/frameworks/opencl/api/error.rs b/coaster/src/frameworks/opencl/api/error.rs index b0e67724d..f5fc8c690 100644 --- a/coaster/src/frameworks/opencl/api/error.rs +++ b/coaster/src/frameworks/opencl/api/error.rs @@ -1,6 +1,6 @@ //! Provides Rust Errors for OpenCL's status. -use std::{fmt, error}; +use std::{error, fmt}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] /// Defines OpenCL errors. diff --git a/coaster/src/frameworks/opencl/api/ffi.rs b/coaster/src/frameworks/opencl/api/ffi.rs index 44c72bb85..62a531c95 100644 --- a/coaster/src/frameworks/opencl/api/ffi.rs +++ b/coaster/src/frameworks/opencl/api/ffi.rs @@ -1,27 +1,31 @@ //! Provides the Foreign Function Interface for OpenCL. #![allow(non_camel_case_types, dead_code)] -#![allow(missing_docs, missing_debug_implementations, missing_copy_implementations)] +#![allow( + missing_docs, + missing_debug_implementations, + missing_copy_implementations +)] -use libc; use super::types as cl; +use libc; #[cfg_attr(target_os = "macos", link(name = "OpenCL", kind = "framework"))] #[cfg_attr(not(target_os = "macos"), link(name = "OpenCL"))] -extern -{ +extern "C" { /* Platform APIs */ pub fn clGetPlatformIDs( num_entries: cl::uint, platforms: *mut cl::platform_id, - num_platforms: *mut cl::uint) -> cl::Status; + num_platforms: *mut cl::uint, + ) -> cl::Status; pub fn clGetPlatformInfo( platform: cl::platform_id, param_name: cl::platform_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; - + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Device APIs */ pub fn clGetDeviceIDs( @@ -29,30 +33,44 @@ extern device_type: cl::device_type, num_entries: cl::uint, devices: *mut cl::device_id, - num_devices: *mut cl::uint) -> cl::Status; + num_devices: *mut cl::uint, + ) -> cl::Status; pub fn clGetDeviceInfo( device: cl::device_id, param_name: cl::device_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Context APIs */ pub fn clCreateContext( properties: *const cl::context_properties, num_devices: cl::uint, devices: *const cl::device_id, - pfn_notify: extern fn (*const libc::c_char, *const libc::c_void, libc::size_t, *mut libc::c_void), + pfn_notify: extern "C" fn( + *const libc::c_char, + *const libc::c_void, + libc::size_t, + *mut libc::c_void, + ), user_data: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::context_id; + errcode_ret: *mut cl::int, + ) -> cl::context_id; pub fn clCreateContextFromType( properties: *mut cl::context_properties, device_type: cl::device_type, - pfn_notify: extern fn (*mut libc::c_char, *mut libc::c_void, libc::size_t, *mut libc::c_void), + pfn_notify: extern "C" fn( + *mut libc::c_char, + *mut libc::c_void, + libc::size_t, + *mut libc::c_void, + ), user_data: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::context_id; + errcode_ret: *mut cl::int, + ) -> cl::context_id; pub fn clRetainContext(context: cl::context_id) -> cl::Status; @@ -63,14 +81,16 @@ extern param_name: cl::context_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Command Queue APIs */ pub fn clCreateCommandQueue( context: cl::context_id, device: cl::device_id, properties: cl::command_queue_properties, - errcode_ret: *mut cl::int) -> cl::queue_id; + errcode_ret: *mut cl::int, + ) -> cl::queue_id; pub fn clRetainCommandQueue(command_queue: cl::queue_id) -> cl::Status; @@ -81,7 +101,8 @@ extern param_name: cl::command_queue_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Memory Object APIs */ pub fn clCreateBuffer( @@ -89,14 +110,16 @@ extern flags: cl::mem_flags, size: libc::size_t, host_ptr: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::memory_id; + errcode_ret: *mut cl::int, + ) -> cl::memory_id; pub fn clCreateSubBuffer( buffer: cl::memory_id, flags: cl::mem_flags, buffer_create_type: cl::buffer_create_type, buffer_create_info: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::memory_id; + errcode_ret: *mut cl::int, + ) -> cl::memory_id; pub fn clCreateImage2D( context: cl::context_id, @@ -106,7 +129,8 @@ extern image_height: libc::size_t, image_row_pitch: libc::size_t, host_ptr: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::memory_id; + errcode_ret: *mut cl::int, + ) -> cl::memory_id; pub fn clCreateImage3D( context: cl::context_id, @@ -120,7 +144,8 @@ extern image_row_pitch: libc::size_t, image_slice_pitch: libc::size_t, host_ptr: *mut libc::c_void, - errcode_ret: *mut cl::int) -> cl::memory_id; + errcode_ret: *mut cl::int, + ) -> cl::memory_id; pub fn clRetainMemObject(memobj: cl::memory_id) -> cl::Status; @@ -132,27 +157,30 @@ extern image_type: cl::mem_object_type, num_entries: cl::uint, image_formats: *mut cl::image_format, - num_image_formats: *mut cl::uint) -> cl::Status; + num_image_formats: *mut cl::uint, + ) -> cl::Status; pub fn clGetMemObjectInfo( memobj: cl::memory_id, param_name: cl::mem_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; pub fn clGetImageInfo( image: cl::memory_id, param_name: cl::image_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; pub fn clSetMemObjectDestructorCallback( memobj: cl::memory_id, - pfn_notify: extern fn (cl::memory_id, *mut libc::c_void), - user_data: *mut libc::c_void) -> cl::Status; - + pfn_notify: extern "C" fn(cl::memory_id, *mut libc::c_void), + user_data: *mut libc::c_void, + ) -> cl::Status; /*mut * Sampler APIs */ pub fn clCreateSampler( @@ -160,19 +188,20 @@ extern normalize_coords: cl::boolean, addressing_mode: cl::addressing_mode, filter_mode: cl::filter_mode, - errcode_ret: *mut cl::int) -> cl::sampler; + errcode_ret: *mut cl::int, + ) -> cl::sampler; pub fn clRetainSampler(sampler: cl::sampler) -> cl::Status; - pub fn clReleaseSampler(sampler: cl::sampler) ->cl::int; + pub fn clReleaseSampler(sampler: cl::sampler) -> cl::int; pub fn clGetSamplerInfo( sampler: cl::sampler, param_name: cl::sampler_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; - + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Program Object APIs */ pub fn clCreateProgramWithSource( @@ -180,7 +209,8 @@ extern count: cl::uint, strings: *const *const libc::c_char, lengths: *const libc::size_t, - errcode_ret: *mut cl::int) -> cl::program; + errcode_ret: *mut cl::int, + ) -> cl::program; pub fn clCreateProgramWithBinary( context: cl::context_id, @@ -189,7 +219,8 @@ extern lengths: *const libc::size_t, binaries: *const *const libc::c_uchar, binary_status: *mut cl::int, - errcode_ret: *mut cl::int) -> cl::program; + errcode_ret: *mut cl::int, + ) -> cl::program; pub fn clRetainProgram(program: cl::program) -> cl::Status; @@ -200,8 +231,9 @@ extern num_devices: cl::uint, device_list: *const cl::device_id, options: *const libc::c_char, - pfn_notify: extern fn (cl::program, *mut libc::c_void), - user_data: *mut libc::c_void) -> cl::Status; + pfn_notify: extern "C" fn(cl::program, *mut libc::c_void), + user_data: *mut libc::c_void, + ) -> cl::Status; pub fn clUnloadCompiler() -> cl::Status; @@ -210,7 +242,8 @@ extern param_name: cl::program_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; pub fn clGetProgramBuildInfo( program: cl::program, @@ -218,19 +251,22 @@ extern param_name: cl::program_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Kernel Object APIs */ pub fn clCreateKernel( program: cl::program, kernel_name: *const libc::c_char, - errcode_ret: *mut cl::int) -> cl::kernel_id; + errcode_ret: *mut cl::int, + ) -> cl::kernel_id; pub fn clCreateKernelsInProgram( program: cl::program, num_kernels: cl::uint, kernels: *mut cl::kernel_id, - num_kernels_ret: *mut cl::uint) -> cl::Status; + num_kernels_ret: *mut cl::uint, + ) -> cl::Status; pub fn clRetainKernel(kernel: cl::kernel_id) -> cl::Status; @@ -240,14 +276,16 @@ extern kernel: cl::kernel_id, arg_index: cl::uint, arg_size: libc::size_t, - arg_value: *const libc::c_void) -> cl::Status; + arg_value: *const libc::c_void, + ) -> cl::Status; pub fn clGetKernelInfo( kernel: cl::kernel_id, param_name: cl::kernel_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; pub fn clGetKernelWorkGroupInfo( kernel: cl::kernel_id, @@ -255,39 +293,34 @@ extern param_name: cl::kernel_work_group_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; - + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Event Object APIs */ - pub fn clWaitForEvents( - num_events: cl::uint, - event_list: *const cl::event) -> cl::Status; + pub fn clWaitForEvents(num_events: cl::uint, event_list: *const cl::event) -> cl::Status; pub fn clGetEventInfo( event: cl::event, param_name: cl::event_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; - pub fn clCreateUserEvent( - context: cl::context_id, - errcode_ret: *mut cl::int) -> cl::event; + pub fn clCreateUserEvent(context: cl::context_id, errcode_ret: *mut cl::int) -> cl::event; pub fn clRetainEvent(event: cl::event) -> cl::Status; pub fn clReleaseEvent(event: cl::event) -> cl::Status; - pub fn clSetUserEventStatus( - event: cl::event, - execution_status: cl::int) -> cl::Status; + pub fn clSetUserEventStatus(event: cl::event, execution_status: cl::int) -> cl::Status; pub fn clSetEventCallback( event: cl::event, command_exec_callback_type: cl::int, - pfn_notify: extern fn (cl::event, cl::int, *mut libc::c_void), - user_data: *mut libc::c_void) -> cl::Status; - + pfn_notify: extern "C" fn(cl::event, cl::int, *mut libc::c_void), + user_data: *mut libc::c_void, + ) -> cl::Status; /* Profiling APIs */ pub fn clGetEventProfilingInfo( @@ -295,8 +328,8 @@ extern param_name: cl::profiling_info, param_value_size: libc::size_t, param_value: *mut libc::c_void, - param_value_size_ret: *mut libc::size_t) -> cl::Status; - + param_value_size_ret: *mut libc::size_t, + ) -> cl::Status; /* Flush and Finish APIs */ pub fn clFlush(command_queue: cl::queue_id) -> cl::Status; @@ -313,7 +346,8 @@ extern ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueReadBufferRect( command_queue: cl::queue_id, @@ -329,7 +363,8 @@ extern ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueWriteBuffer( command_queue: cl::queue_id, @@ -340,7 +375,8 @@ extern ptr: *const libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueWriteBufferRect( command_queue: cl::queue_id, @@ -355,7 +391,8 @@ extern ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueCopyBuffer( command_queue: cl::queue_id, @@ -366,7 +403,8 @@ extern cb: libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueCopyBufferRect( command_queue: cl::queue_id, @@ -381,7 +419,8 @@ extern dst_slice_pitch: libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueReadImage( command_queue: cl::queue_id, @@ -394,7 +433,8 @@ extern ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueWriteImage( command_queue: cl::queue_id, @@ -407,7 +447,8 @@ extern ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueCopyImage( command_queue: cl::queue_id, @@ -418,7 +459,8 @@ extern region: *mut libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueCopyImageToBuffer( command_queue: cl::queue_id, @@ -429,7 +471,8 @@ extern dst_offset: libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueCopyBufferToImage( command_queue: cl::queue_id, @@ -440,7 +483,8 @@ extern region: *mut libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueMapBuffer( command_queue: cl::queue_id, @@ -452,7 +496,8 @@ extern num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, event: *mut cl::event, - errorcode_ret: *mut cl::int); + errorcode_ret: *mut cl::int, + ); pub fn clEnqueueMapImage( command_queue: cl::queue_id, @@ -466,7 +511,8 @@ extern num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, event: *mut cl::event, - errorcode_ret: *mut cl::int); + errorcode_ret: *mut cl::int, + ); pub fn clEnqueueUnmapMemObject( command_queue: cl::queue_id, @@ -474,7 +520,8 @@ extern mapped_ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueNDRangeKernel( command_queue: cl::queue_id, @@ -485,18 +532,20 @@ extern local_work_size: *const libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueTask( command_queue: cl::queue_id, kernel: cl::kernel_id, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueNativeKernel( command_queue: cl::queue_id, - user_func: extern fn (*mut libc::c_void), + user_func: extern "C" fn(*mut libc::c_void), args: *mut libc::c_void, cb_args: libc::size_t, num_mem_objects: cl::uint, @@ -504,16 +553,16 @@ extern args_mem_loc: *const *const libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event) -> cl::Status; + event: *mut cl::event, + ) -> cl::Status; - pub fn clEnqueueMarker( - command_queue: cl::queue_id, - event: *mut cl::event) -> cl::Status; + pub fn clEnqueueMarker(command_queue: cl::queue_id, event: *mut cl::event) -> cl::Status; pub fn clEnqueueWaitForEvents( command_queue: cl::queue_id, num_events: cl::uint, - event_list: *mut cl::event) -> cl::Status; + event_list: *mut cl::event, + ) -> cl::Status; pub fn clEnqueueBarrier(command_queue: cl::queue_id) -> cl::Status; diff --git a/coaster/src/frameworks/opencl/api/memory.rs b/coaster/src/frameworks/opencl/api/memory.rs index 169287615..d785bb10d 100644 --- a/coaster/src/frameworks/opencl/api/memory.rs +++ b/coaster/src/frameworks/opencl/api/memory.rs @@ -2,11 +2,11 @@ //! //! At Coaster device can be understood as a synonym to OpenCL's context. -use std::ptr; -use libc; -use frameworks::opencl::{API, Error, Event, Context, Memory, MemoryFlags, Queue}; -use super::types as cl; use super::ffi::*; +use super::types as cl; +use frameworks::opencl::{Context, Error, Event, Memory, MemoryFlags, Queue, API}; +use libc; +use std::ptr; impl API { /// Allocates memory on the OpenCL device. @@ -15,16 +15,26 @@ impl API { /// object can be a scalar data type (such as an int, float), vector data type, or a /// user-defined structure. /// Returns a memory id for the created buffer, which can now be writen to. - pub fn create_buffer(context: &Context, flags: MemoryFlags, size: usize, host_pointer: Option<*mut u8>) -> Result { + pub fn create_buffer( + context: &Context, + flags: MemoryFlags, + size: usize, + host_pointer: Option<*mut u8>, + ) -> Result { let host_ptr = host_pointer.unwrap_or(ptr::null_mut()); Ok(Memory::from_c(unsafe { - API::ffi_create_buffer(context.id() as *mut libc::c_void, flags.bits(), size, host_ptr as *mut libc::c_void) + API::ffi_create_buffer( + context.id() as *mut libc::c_void, + flags.bits(), + size, + host_ptr as *mut libc::c_void, + ) }?)) } /// Releases allocated memory from the OpenCL device. pub fn release_memory(memory: &mut Memory) -> Result<(), Error> { - Ok(unsafe {API::ffi_release_mem_object(memory.id_c())}?) + Ok(unsafe { API::ffi_release_mem_object(memory.id_c()) }?) } /// Reads from a OpenCL memory object to the host memory. @@ -46,18 +56,22 @@ impl API { ptr::null_mut() }; let new_event: cl::event = 0 as *mut libc::c_void; - let res = unsafe {API::ffi_enqueue_read_buffer(queue.id_c(), - mem.id_c(), - blocking_read as cl::boolean, - offset, - size, - host_mem, - num_events_in_wait_list as cl::uint, - event_list, - new_event as *mut cl::event)}; + let res = unsafe { + API::ffi_enqueue_read_buffer( + queue.id_c(), + mem.id_c(), + blocking_read as cl::boolean, + offset, + size, + host_mem, + num_events_in_wait_list as cl::uint, + event_list, + new_event as *mut cl::event, + ) + }; match res { Ok(_) => Ok(Event::from_c(new_event)), - Err(err) => Err(err) + Err(err) => Err(err), } } @@ -80,18 +94,22 @@ impl API { ptr::null_mut() }; let new_event: cl::event = 0 as *mut libc::c_void; - let res = unsafe {API::ffi_enqueue_write_buffer(queue.id_c(), - mem.id_c(), - blocking_write as cl::boolean, - offset, - size, - host_mem, - num_events_in_wait_list as cl::uint, - event_list, - new_event as *mut cl::event)}; + let res = unsafe { + API::ffi_enqueue_write_buffer( + queue.id_c(), + mem.id_c(), + blocking_write as cl::boolean, + offset, + size, + host_mem, + num_events_in_wait_list as cl::uint, + event_list, + new_event as *mut cl::event, + ) + }; match res { Ok(_) => Ok(Event::from_c(new_event)), - Err(err) => Err(err) + Err(err) => Err(err), } } @@ -99,7 +117,7 @@ impl API { context: cl::context_id, flags: cl::mem_flags, size: libc::size_t, - host_ptr: *mut libc::c_void + host_ptr: *mut libc::c_void, ) -> Result { let mut errcode: i32 = 0; let memory_id = clCreateBuffer(context, flags, size, host_ptr, &mut errcode); @@ -119,10 +137,16 @@ impl API { unsafe fn ffi_release_mem_object(memobj: cl::memory_id) -> Result<(), Error> { match clReleaseMemObject(memobj) { cl::Status::SUCCESS => Ok(()), - cl::Status::INVALID_MEM_OBJECT => Err(Error::InvalidMemObject("memobj is not a valid memory object.")), - cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources("Failure to allocate resources on the device")), - cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory("Failure to allocate resources on the host")), - _ => Err(Error::Other("Unable to release memory object.")) + cl::Status::INVALID_MEM_OBJECT => Err(Error::InvalidMemObject( + "memobj is not a valid memory object.", + )), + cl::Status::OUT_OF_RESOURCES => Err(Error::OutOfResources( + "Failure to allocate resources on the device", + )), + cl::Status::OUT_OF_HOST_MEMORY => Err(Error::OutOfHostMemory( + "Failure to allocate resources on the host", + )), + _ => Err(Error::Other("Unable to release memory object.")), } } @@ -135,7 +159,7 @@ impl API { ptr: *mut libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event + event: *mut cl::event, ) -> Result<(), Error> { match clEnqueueReadBuffer(command_queue, buffer, blocking_read, offset, cb, ptr, num_events_in_wait_list, event_wait_list, event) { cl::Status::SUCCESS => Ok(()), @@ -163,7 +187,7 @@ impl API { ptr: *const libc::c_void, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event + event: *mut cl::event, ) -> Result<(), Error> { match clEnqueueWriteBuffer(command_queue, buffer, blocking_write, offset, cb, ptr, num_events_in_wait_list, event_wait_list, event) { cl::Status::SUCCESS => Ok(()), diff --git a/coaster/src/frameworks/opencl/api/mod.rs b/coaster/src/frameworks/opencl/api/mod.rs index 51e46e1e2..48f967681 100644 --- a/coaster/src/frameworks/opencl/api/mod.rs +++ b/coaster/src/frameworks/opencl/api/mod.rs @@ -6,11 +6,11 @@ pub use self::error::Error; /// Defines the OpenCL API. pub struct API; -mod error; mod context; mod device; +mod error; +mod ffi; mod memory; mod platform; mod queue; -mod ffi; pub mod types; diff --git a/coaster/src/frameworks/opencl/api/platform.rs b/coaster/src/frameworks/opencl/api/platform.rs index 15f425dc7..0698c8de7 100644 --- a/coaster/src/frameworks/opencl/api/platform.rs +++ b/coaster/src/frameworks/opencl/api/platform.rs @@ -1,11 +1,11 @@ //! Provides the OpenCL API with its platform functionality. -use frameworks::opencl::{API, Error}; -use frameworks::opencl::Platform; -use super::types as cl; use super::ffi::*; -use std::ptr; +use super::types as cl; +use frameworks::opencl::Platform; +use frameworks::opencl::{Error, API}; use std::iter::repeat; +use std::ptr; use std::sync::Mutex; impl API { @@ -27,21 +27,23 @@ impl API { } let guard = PLATFORM_MUTEX.lock(); - unsafe {API::ffi_get_platform_ids(0, ptr::null_mut(), &mut num_platforms)}?; + unsafe { API::ffi_get_platform_ids(0, ptr::null_mut(), &mut num_platforms) }?; - let mut ids: Vec = repeat(0 as cl::device_id).take(num_platforms as usize).collect(); + let mut ids: Vec = repeat(0 as cl::device_id) + .take(num_platforms as usize) + .collect(); - unsafe {API::ffi_get_platform_ids(num_platforms, ids.as_mut_ptr(), &mut num_platforms)}?; + unsafe { API::ffi_get_platform_ids(num_platforms, ids.as_mut_ptr(), &mut num_platforms) }?; let _ = guard; - Ok(ids.iter().map(|id| Platform::from_c(*id) ).collect()) + Ok(ids.iter().map(|id| Platform::from_c(*id)).collect()) } unsafe fn ffi_get_platform_ids( num_entries: cl::uint, platforms: *mut cl::platform_id, - num_platforms: *mut cl::uint + num_platforms: *mut cl::uint, ) -> Result<(), Error> { match clGetPlatformIDs(num_entries, platforms, num_platforms) { cl::Status::SUCCESS => Ok(()), diff --git a/coaster/src/frameworks/opencl/api/queue.rs b/coaster/src/frameworks/opencl/api/queue.rs index 61bfc0970..fe984d0e2 100644 --- a/coaster/src/frameworks/opencl/api/queue.rs +++ b/coaster/src/frameworks/opencl/api/queue.rs @@ -2,9 +2,9 @@ //! //! At Coaster device can be understood as a synonym to OpenCL's context. -use frameworks::opencl::{API, Device, Error, Event, Context, Program, Queue, QueueFlags}; -use super::types as cl; use super::ffi::*; +use super::types as cl; +use frameworks::opencl::{Context, Device, Error, Event, Program, Queue, QueueFlags, API}; use libc; use std::ptr; @@ -13,15 +13,21 @@ impl API { /// /// OpenCL command queues are used to control memory allocation and operations /// for a single device. - pub fn create_queue(context: &Context, device: &Device, queue_flags: &QueueFlags) -> Result { - Ok(Queue::from_c((unsafe { - API::ffi_create_command_queue(context.id_c(), device.id_c(), queue_flags.bits()) - })?)) + pub fn create_queue( + context: &Context, + device: &Device, + queue_flags: &QueueFlags, + ) -> Result { + Ok(Queue::from_c( + (unsafe { + API::ffi_create_command_queue(context.id_c(), device.id_c(), queue_flags.bits()) + })?, + )) } /// Releases command queue from the OpenCL device. pub fn release_queue(queue: &mut Queue) -> Result<(), Error> { - Ok((unsafe {API::ffi_release_command_queue(queue.id_c())})?) + Ok((unsafe { API::ffi_release_command_queue(queue.id_c()) })?) } unsafe fn ffi_create_command_queue( @@ -33,24 +39,38 @@ impl API { let queue_id = clCreateCommandQueue(context, device, properties, &mut errcode); match errcode { errcode if errcode == cl::Status::SUCCESS as i32 => Ok(queue_id), - errcode if errcode == cl::Status::INVALID_CONTEXT as i32 => Err(Error::InvalidContext("context is not a valid context.")), - errcode if errcode == cl::Status::INVALID_DEVICE as i32 => Err(Error::InvalidDevice("devices contains an invalid device.")), - errcode if errcode == cl::Status::INVALID_VALUE as i32 => Err(Error::InvalidValue("values specified in flags are not valid")), - errcode if errcode == cl::Status::INVALID_QUEUE_PROPERTIES as i32 => Err(Error::InvalidQueueProperties("values specified in properties are valid but are not supported by the device")), - errcode if errcode == cl::Status::OUT_OF_HOST_MEMORY as i32 => Err(Error::OutOfHostMemory("Failure to allocate resources on the host")), - _ => Err(Error::Other("Unable to create command queue.")) + errcode if errcode == cl::Status::INVALID_CONTEXT as i32 => { + Err(Error::InvalidContext("context is not a valid context.")) + } + errcode if errcode == cl::Status::INVALID_DEVICE as i32 => { + Err(Error::InvalidDevice("devices contains an invalid device.")) + } + errcode if errcode == cl::Status::INVALID_VALUE as i32 => Err(Error::InvalidValue( + "values specified in flags are not valid", + )), + errcode if errcode == cl::Status::INVALID_QUEUE_PROPERTIES as i32 => { + Err(Error::InvalidQueueProperties( + "values specified in properties are valid but are not supported by the device", + )) + } + errcode if errcode == cl::Status::OUT_OF_HOST_MEMORY as i32 => Err( + Error::OutOfHostMemory("Failure to allocate resources on the host"), + ), + _ => Err(Error::Other("Unable to create command queue.")), } } unsafe fn ffi_release_command_queue(command_queue: cl::queue_id) -> Result<(), Error> { match clReleaseCommandQueue(command_queue) { cl::Status::SUCCESS => Ok(()), - cl::Status::INVALID_COMMAND_QUEUE => Err(Error::InvalidCommandQueue("command_queue is not a valid command-queue")), - _ => Err(Error::Other("Unable to release command queue.")) + cl::Status::INVALID_COMMAND_QUEUE => Err(Error::InvalidCommandQueue( + "command_queue is not a valid command-queue", + )), + _ => Err(Error::Other("Unable to release command queue.")), } } - unsafe fn ffi_enqueue_nd_range_kernel ( + unsafe fn ffi_enqueue_nd_range_kernel( command_queue: cl::queue_id, kernel: cl::kernel_id, work_dim: cl::uint, @@ -59,8 +79,8 @@ impl API { local_work_size: *const libc::size_t, num_events_in_wait_list: cl::uint, event_wait_list: *const cl::event, - event: *mut cl::event - ) -> Result<(),Error> { + event: *mut cl::event, + ) -> Result<(), Error> { match clEnqueueNDRangeKernel(command_queue, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_events_in_wait_list, event_wait_list, event) { cl::Status::SUCCESS => Ok(()), cl::Status::INVALID_COMMAND_QUEUE => Err(Error::InvalidCommandQueue("command_queue is not a valid command-queue")), @@ -88,7 +108,7 @@ impl API { global_work_offset: usize, global_work_size: usize, local_work_size: usize, - event_wait_list: &[Event] + event_wait_list: &[Event], ) -> Result { let num_events_in_wait_list = event_wait_list.len(); let event_list: *const *mut libc::c_void = if event_wait_list.is_empty() { @@ -98,16 +118,19 @@ impl API { }; let new_event: cl::event = 0 as *mut libc::c_void; - unsafe {API::ffi_enqueue_nd_range_kernel( - queue.id_c(), - kernel.id_c(), - work_dim, - global_work_offset as *const libc::size_t, - global_work_size as *const libc::size_t, - local_work_size as *const libc::size_t, - num_events_in_wait_list as cl::uint, - event_list, - new_event as *mut cl::event)}?; + unsafe { + API::ffi_enqueue_nd_range_kernel( + queue.id_c(), + kernel.id_c(), + work_dim, + global_work_offset as *const libc::size_t, + global_work_size as *const libc::size_t, + local_work_size as *const libc::size_t, + num_events_in_wait_list as cl::uint, + event_list, + new_event as *mut cl::event, + ) + }?; Ok(Event::from_c(new_event)) } diff --git a/coaster/src/frameworks/opencl/api/types.rs b/coaster/src/frameworks/opencl/api/types.rs index f0e770ba3..4bcb298f6 100644 --- a/coaster/src/frameworks/opencl/api/types.rs +++ b/coaster/src/frameworks/opencl/api/types.rs @@ -1,78 +1,81 @@ //! Provides the Foreign Type Interface for OpenCL. #![allow(non_camel_case_types, dead_code)] -#![allow(missing_docs, missing_debug_implementations, missing_copy_implementations)] +#![allow( + missing_docs, + missing_debug_implementations, + missing_copy_implementations +)] use libc; use std::fmt; /* Opaque types */ -pub type platform_id = *mut libc::c_void; -pub type device_id = *mut libc::c_void; -pub type context_id = *mut libc::c_void; -pub type queue_id = *mut libc::c_void; -pub type memory_id = *mut libc::c_void; -pub type program = *mut libc::c_void; -pub type kernel_id = *mut libc::c_void; -pub type event = *mut libc::c_void; -pub type sampler = *mut libc::c_void; +pub type platform_id = *mut libc::c_void; +pub type device_id = *mut libc::c_void; +pub type context_id = *mut libc::c_void; +pub type queue_id = *mut libc::c_void; +pub type memory_id = *mut libc::c_void; +pub type program = *mut libc::c_void; +pub type kernel_id = *mut libc::c_void; +pub type event = *mut libc::c_void; +pub type sampler = *mut libc::c_void; /* Scalar types */ -pub type short = i16; -pub type ushort = u16; -pub type int = i32; -pub type uint = u32; -pub type long = i64; -pub type ulong = u64; -pub type half = u16; -pub type float = f32; -pub type double = f64; - -pub type boolean = uint; -pub type bitfield = ulong; -pub type device_type = bitfield; -pub type platform_info = uint; -pub type device_info = uint; -pub type device_fp_config = bitfield; -pub type device_mem_cache_type = uint; -pub type device_local_mem_type = uint; -pub type device_exec_capabilities = bitfield; -pub type command_queue_properties = bitfield; - -pub type context_properties = libc::intptr_t; -pub type context_info = uint; -pub type command_queue_info = uint; -pub type channel_order = uint; -pub type channel_type = uint; -pub type mem_flags = bitfield; -pub type mem_object_type = uint; -pub type mem_info = uint; -pub type image_info = uint; -pub type buffer_create_type = uint; -pub type addressing_mode = uint; -pub type filter_mode = uint; -pub type sampler_info = uint; -pub type map_flags = bitfield; -pub type program_info = uint; -pub type program_build_info = uint; -pub type build_status = int; -pub type kernel_info = uint; -pub type kernel_work_group_info = uint; -pub type event_info = uint; -pub type command_type = uint; -pub type profiling_info = uint; +pub type short = i16; +pub type ushort = u16; +pub type int = i32; +pub type uint = u32; +pub type long = i64; +pub type ulong = u64; +pub type half = u16; +pub type float = f32; +pub type double = f64; + +pub type boolean = uint; +pub type bitfield = ulong; +pub type device_type = bitfield; +pub type platform_info = uint; +pub type device_info = uint; +pub type device_fp_config = bitfield; +pub type device_mem_cache_type = uint; +pub type device_local_mem_type = uint; +pub type device_exec_capabilities = bitfield; +pub type command_queue_properties = bitfield; + +pub type context_properties = libc::intptr_t; +pub type context_info = uint; +pub type command_queue_info = uint; +pub type channel_order = uint; +pub type channel_type = uint; +pub type mem_flags = bitfield; +pub type mem_object_type = uint; +pub type mem_info = uint; +pub type image_info = uint; +pub type buffer_create_type = uint; +pub type addressing_mode = uint; +pub type filter_mode = uint; +pub type sampler_info = uint; +pub type map_flags = bitfield; +pub type program_info = uint; +pub type program_build_info = uint; +pub type build_status = int; +pub type kernel_info = uint; +pub type kernel_work_group_info = uint; +pub type event_info = uint; +pub type command_type = uint; +pub type profiling_info = uint; #[repr(C)] pub struct image_format { - image_channel_order: channel_order, - image_channel_data_type: channel_type + image_channel_order: channel_order, + image_channel_data_type: channel_type, } pub struct buffer_region { - origin: libc::size_t, - size: libc::size_t + origin: libc::size_t, + size: libc::size_t, } - enum_from_primitive! { /// OpenCL error codes. #[derive(PartialEq, Debug)] @@ -139,19 +142,19 @@ impl fmt::Display for Status { } /* OpenCL Version */ -pub static CL_VERSION_1_0: boolean = 1; -pub static CL_VERSION_1_1: boolean = 1; +pub static CL_VERSION_1_0: boolean = 1; +pub static CL_VERSION_1_1: boolean = 1; /* cl_bool */ -pub static CL_FALSE: boolean = 0; -pub static CL_TRUE: boolean = 1; +pub static CL_FALSE: boolean = 0; +pub static CL_TRUE: boolean = 1; /* cl_platform_info */ -pub static CL_PLATFORM_PROFILE: uint = 0x0900; -pub static CL_PLATFORM_VERSION: uint = 0x0901; -pub static CL_PLATFORM_NAME: uint = 0x0902; -pub static CL_PLATFORM_VENDOR: uint = 0x0903; -pub static CL_PLATFORM_EXTENSIONS: uint = 0x0904; +pub static CL_PLATFORM_PROFILE: uint = 0x0900; +pub static CL_PLATFORM_VERSION: uint = 0x0901; +pub static CL_PLATFORM_NAME: uint = 0x0902; +pub static CL_PLATFORM_VENDOR: uint = 0x0903; +pub static CL_PLATFORM_EXTENSIONS: uint = 0x0904; /* cl_device_type - bitfield */ pub enum DeviceType { @@ -163,286 +166,286 @@ pub enum DeviceType { } pub static DEVICE_TYPE: [DeviceType; 5] = [ DeviceType::DEFAULT(1), - DeviceType::CPU(1<<1), - DeviceType::GPU(1<<2), - DeviceType::ACCELERATOR(1<<3), - DeviceType::ALL(0xFFFFFFFF) + DeviceType::CPU(1 << 1), + DeviceType::GPU(1 << 2), + DeviceType::ACCELERATOR(1 << 3), + DeviceType::ALL(0xFFFFFFFF), ]; -pub const CL_DEVICE_TYPE_DEFAULT: bitfield = 1; -pub const CL_DEVICE_TYPE_CPU: bitfield = 1 << 1; -pub const CL_DEVICE_TYPE_GPU: bitfield = 1 << 2; -pub const CL_DEVICE_TYPE_ACCELERATOR: bitfield = 1 << 3; -pub const CL_DEVICE_TYPE_CUSTOM: bitfield = 1 << 4; -pub static CL_DEVICE_TYPE_ALL: bitfield = 0xFFFFFFFF; +pub const CL_DEVICE_TYPE_DEFAULT: bitfield = 1; +pub const CL_DEVICE_TYPE_CPU: bitfield = 1 << 1; +pub const CL_DEVICE_TYPE_GPU: bitfield = 1 << 2; +pub const CL_DEVICE_TYPE_ACCELERATOR: bitfield = 1 << 3; +pub const CL_DEVICE_TYPE_CUSTOM: bitfield = 1 << 4; +pub static CL_DEVICE_TYPE_ALL: bitfield = 0xFFFFFFFF; /* cl_device_info */ -pub static CL_DEVICE_TYPE: uint = 0x1000; -pub static CL_DEVICE_VENDOR_ID: uint = 0x1001; -pub static CL_DEVICE_MAX_COMPUTE_UNITS: uint = 0x1002; -pub static CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: uint = 0x1003; -pub static CL_DEVICE_MAX_WORK_GROUP_SIZE: uint = 0x1004; -pub static CL_DEVICE_MAX_WORK_ITEM_SIZES: uint = 0x1005; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: uint = 0x1006; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: uint = 0x1007; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: uint = 0x1008; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: uint = 0x1009; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: uint = 0x100A; -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: uint = 0x100B; -pub static CL_DEVICE_MAX_CLOCK_FREQUENCY: uint = 0x100C; -pub static CL_DEVICE_ADDRESS_BITS: uint = 0x100D; -pub static CL_DEVICE_MAX_READ_IMAGE_ARGS: uint = 0x100E; -pub static CL_DEVICE_MAX_WRITE_IMAGE_ARGS: uint = 0x100F; -pub static CL_DEVICE_MAX_MEM_ALLOC_SIZE: uint = 0x1010; -pub static CL_DEVICE_IMAGE2D_MAX_WIDTH: uint = 0x1011; -pub static CL_DEVICE_IMAGE2D_MAX_HEIGHT: uint = 0x1012; -pub static CL_DEVICE_IMAGE3D_MAX_WIDTH: uint = 0x1013; -pub static CL_DEVICE_IMAGE3D_MAX_HEIGHT: uint = 0x1014; -pub static CL_DEVICE_IMAGE3D_MAX_DEPTH: uint = 0x1015; -pub static CL_DEVICE_IMAGE_SUPPORT: uint = 0x1016; -pub static CL_DEVICE_MAX_PARAMETER_SIZE: uint = 0x1017; -pub static CL_DEVICE_MAX_SAMPLERS: uint = 0x1018; -pub static CL_DEVICE_MEM_BASE_ADDR_ALIGN: uint = 0x1019; -pub static CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: uint = 0x101A; -pub static CL_DEVICE_SINGLE_FP_CONFIG: uint = 0x101B; -pub static CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: uint = 0x101C; -pub static CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: uint = 0x101D; -pub static CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: uint = 0x101E; -pub static CL_DEVICE_GLOBAL_MEM_SIZE: uint = 0x101F; -pub static CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: uint = 0x1020; -pub static CL_DEVICE_MAX_CONSTANT_ARGS: uint = 0x1021; -pub static CL_DEVICE_LOCAL_MEM_TYPE: uint = 0x1022; -pub static CL_DEVICE_LOCAL_MEM_SIZE: uint = 0x1023; -pub static CL_DEVICE_ERROR_CORRECTION_SUPPORT: uint = 0x1024; -pub static CL_DEVICE_PROFILING_TIMER_RESOLUTION: uint = 0x1025; -pub static CL_DEVICE_ENDIAN_LITTLE: uint = 0x1026; -pub static CL_DEVICE_AVAILABLE: uint = 0x1027; -pub static CL_DEVICE_COMPILER_AVAILABLE: uint = 0x1028; -pub static CL_DEVICE_EXECUTION_CAPABILITIES: uint = 0x1029; -pub static CL_DEVICE_QUEUE_PROPERTIES: uint = 0x102A; -pub const CL_DEVICE_NAME: uint = 0x102B; -pub static CL_DEVICE_VENDOR: uint = 0x102C; -pub static CL_DRIVER_VERSION: uint = 0x102D; -pub static CL_DEVICE_PROFILE: uint = 0x102E; -pub static CL_DEVICE_VERSION: uint = 0x102F; -pub static CL_DEVICE_EXTENSIONS: uint = 0x1030; -pub static CL_DEVICE_PLATFORM: uint = 0x1031; +pub static CL_DEVICE_TYPE: uint = 0x1000; +pub static CL_DEVICE_VENDOR_ID: uint = 0x1001; +pub static CL_DEVICE_MAX_COMPUTE_UNITS: uint = 0x1002; +pub static CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: uint = 0x1003; +pub static CL_DEVICE_MAX_WORK_GROUP_SIZE: uint = 0x1004; +pub static CL_DEVICE_MAX_WORK_ITEM_SIZES: uint = 0x1005; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR: uint = 0x1006; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT: uint = 0x1007; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT: uint = 0x1008; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG: uint = 0x1009; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT: uint = 0x100A; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE: uint = 0x100B; +pub static CL_DEVICE_MAX_CLOCK_FREQUENCY: uint = 0x100C; +pub static CL_DEVICE_ADDRESS_BITS: uint = 0x100D; +pub static CL_DEVICE_MAX_READ_IMAGE_ARGS: uint = 0x100E; +pub static CL_DEVICE_MAX_WRITE_IMAGE_ARGS: uint = 0x100F; +pub static CL_DEVICE_MAX_MEM_ALLOC_SIZE: uint = 0x1010; +pub static CL_DEVICE_IMAGE2D_MAX_WIDTH: uint = 0x1011; +pub static CL_DEVICE_IMAGE2D_MAX_HEIGHT: uint = 0x1012; +pub static CL_DEVICE_IMAGE3D_MAX_WIDTH: uint = 0x1013; +pub static CL_DEVICE_IMAGE3D_MAX_HEIGHT: uint = 0x1014; +pub static CL_DEVICE_IMAGE3D_MAX_DEPTH: uint = 0x1015; +pub static CL_DEVICE_IMAGE_SUPPORT: uint = 0x1016; +pub static CL_DEVICE_MAX_PARAMETER_SIZE: uint = 0x1017; +pub static CL_DEVICE_MAX_SAMPLERS: uint = 0x1018; +pub static CL_DEVICE_MEM_BASE_ADDR_ALIGN: uint = 0x1019; +pub static CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE: uint = 0x101A; +pub static CL_DEVICE_SINGLE_FP_CONFIG: uint = 0x101B; +pub static CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: uint = 0x101C; +pub static CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE: uint = 0x101D; +pub static CL_DEVICE_GLOBAL_MEM_CACHE_SIZE: uint = 0x101E; +pub static CL_DEVICE_GLOBAL_MEM_SIZE: uint = 0x101F; +pub static CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: uint = 0x1020; +pub static CL_DEVICE_MAX_CONSTANT_ARGS: uint = 0x1021; +pub static CL_DEVICE_LOCAL_MEM_TYPE: uint = 0x1022; +pub static CL_DEVICE_LOCAL_MEM_SIZE: uint = 0x1023; +pub static CL_DEVICE_ERROR_CORRECTION_SUPPORT: uint = 0x1024; +pub static CL_DEVICE_PROFILING_TIMER_RESOLUTION: uint = 0x1025; +pub static CL_DEVICE_ENDIAN_LITTLE: uint = 0x1026; +pub static CL_DEVICE_AVAILABLE: uint = 0x1027; +pub static CL_DEVICE_COMPILER_AVAILABLE: uint = 0x1028; +pub static CL_DEVICE_EXECUTION_CAPABILITIES: uint = 0x1029; +pub static CL_DEVICE_QUEUE_PROPERTIES: uint = 0x102A; +pub const CL_DEVICE_NAME: uint = 0x102B; +pub static CL_DEVICE_VENDOR: uint = 0x102C; +pub static CL_DRIVER_VERSION: uint = 0x102D; +pub static CL_DEVICE_PROFILE: uint = 0x102E; +pub static CL_DEVICE_VERSION: uint = 0x102F; +pub static CL_DEVICE_EXTENSIONS: uint = 0x1030; +pub static CL_DEVICE_PLATFORM: uint = 0x1031; /* 0x1032 reserved for CL_DEVICE_DOUBLE_FP_CONFIG */ /* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */ -pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: uint = 0x1034; -pub static CL_DEVICE_HOST_UNIFIED_MEMORY: uint = 0x1035; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: uint = 0x1036; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: uint = 0x1037; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: uint = 0x1038; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: uint = 0x1039; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: uint = 0x103A; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: uint = 0x103B; -pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: uint = 0x103C; -pub static CL_DEVICE_OPENCL_C_VERSION: uint = 0x103D; +pub static CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF: uint = 0x1034; +pub static CL_DEVICE_HOST_UNIFIED_MEMORY: uint = 0x1035; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR: uint = 0x1036; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT: uint = 0x1037; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_INT: uint = 0x1038; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG: uint = 0x1039; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT: uint = 0x103A; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE: uint = 0x103B; +pub static CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF: uint = 0x103C; +pub static CL_DEVICE_OPENCL_C_VERSION: uint = 0x103D; /* cl_device_fp_config - bitfield */ -pub static CL_FP_DENORM: bitfield = 1; -pub static CL_FP_INF_NAN: bitfield = 1 << 1; -pub static CL_FP_ROUND_TO_NEAREST: bitfield = 1 << 2; -pub static CL_FP_ROUND_TO_ZERO: bitfield = 1 << 3; -pub static CL_FP_ROUND_TO_INF: bitfield = 1 << 4; -pub static CL_FP_FMA: bitfield = 1 << 5; -pub static CL_FP_SOFT_FLOAT: bitfield = 1 << 6; +pub static CL_FP_DENORM: bitfield = 1; +pub static CL_FP_INF_NAN: bitfield = 1 << 1; +pub static CL_FP_ROUND_TO_NEAREST: bitfield = 1 << 2; +pub static CL_FP_ROUND_TO_ZERO: bitfield = 1 << 3; +pub static CL_FP_ROUND_TO_INF: bitfield = 1 << 4; +pub static CL_FP_FMA: bitfield = 1 << 5; +pub static CL_FP_SOFT_FLOAT: bitfield = 1 << 6; /* cl_device_mem_cache_type */ -pub static CL_NONE: uint = 0x0; -pub static CL_READ_ONLY_CACHE: uint = 0x1; -pub static CL_READ_WRITE_CACHE: uint = 0x2; +pub static CL_NONE: uint = 0x0; +pub static CL_READ_ONLY_CACHE: uint = 0x1; +pub static CL_READ_WRITE_CACHE: uint = 0x2; /* cl_device_local_mem_type */ -pub static CL_LOCAL: uint = 0x1; -pub static CL_GLOBAL: uint = 0x2; +pub static CL_LOCAL: uint = 0x1; +pub static CL_GLOBAL: uint = 0x2; /* cl_device_exec_capabilities - bitfield */ -pub static CL_EXEC_KERNEL: bitfield = 1; -pub static CL_EXEC_NATIVE_KERNEL: bitfield = 1 << 1; +pub static CL_EXEC_KERNEL: bitfield = 1; +pub static CL_EXEC_NATIVE_KERNEL: bitfield = 1 << 1; /* cl_command_queue_properties - bitfield */ -pub static CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE: bitfield = 1; -pub static CL_QUEUE_PROFILING_ENABLE: bitfield = 1 << 1; +pub static CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE: bitfield = 1; +pub static CL_QUEUE_PROFILING_ENABLE: bitfield = 1 << 1; /* cl_context_info */ -pub const CL_CONTEXT_REFERENCE_COUNT: uint = 0x1080; -pub const CL_CONTEXT_DEVICES: uint = 0x1081; -pub const CL_CONTEXT_PROPERTIES: uint = 0x1082; -pub const CL_CONTEXT_NUM_DEVICES: uint = 0x1083; +pub const CL_CONTEXT_REFERENCE_COUNT: uint = 0x1080; +pub const CL_CONTEXT_DEVICES: uint = 0x1081; +pub const CL_CONTEXT_PROPERTIES: uint = 0x1082; +pub const CL_CONTEXT_NUM_DEVICES: uint = 0x1083; /* cl_context_info + cl_context_properties */ -pub const CL_CONTEXT_PLATFORM: libc::intptr_t = 0x1084; -pub const CL_CONTEXT_INTEROP_USER_SYNC: libc::intptr_t = 0x1085; +pub const CL_CONTEXT_PLATFORM: libc::intptr_t = 0x1084; +pub const CL_CONTEXT_INTEROP_USER_SYNC: libc::intptr_t = 0x1085; /* cl_command_queue_info */ -pub static CL_QUEUE_CONTEXT: uint = 0x1090; -pub static CL_QUEUE_DEVICE: uint = 0x1091; -pub static CL_QUEUE_REFERENCE_COUNT: uint = 0x1092; -pub static CL_QUEUE_PROPERTIES: uint = 0x1093; +pub static CL_QUEUE_CONTEXT: uint = 0x1090; +pub static CL_QUEUE_DEVICE: uint = 0x1091; +pub static CL_QUEUE_REFERENCE_COUNT: uint = 0x1092; +pub static CL_QUEUE_PROPERTIES: uint = 0x1093; /* cl_mem_flags - bitfield */ -pub static CL_MEM_READ_WRITE: bitfield = 1; -pub static CL_MEM_WRITE_ONLY: bitfield = 1 << 1; -pub static CL_MEM_READ_ONLY: bitfield = 1 << 2; -pub static CL_MEM_USE_HOST_PTR: bitfield = 1 << 3; -pub static CL_MEM_ALLOC_HOST_PTR: bitfield = 1 << 4; -pub static CL_MEM_COPY_HOST_PTR: bitfield = 1 << 5; +pub static CL_MEM_READ_WRITE: bitfield = 1; +pub static CL_MEM_WRITE_ONLY: bitfield = 1 << 1; +pub static CL_MEM_READ_ONLY: bitfield = 1 << 2; +pub static CL_MEM_USE_HOST_PTR: bitfield = 1 << 3; +pub static CL_MEM_ALLOC_HOST_PTR: bitfield = 1 << 4; +pub static CL_MEM_COPY_HOST_PTR: bitfield = 1 << 5; /* cl_channel_order */ -pub static CL_R: uint = 0x10B0; -pub static CL_A: uint = 0x10B1; -pub static CL_RG: uint = 0x10B2; -pub static CL_RA: uint = 0x10B3; -pub static CL_RGB: uint = 0x10B4; -pub static CL_RGBA: uint = 0x10B5; -pub static CL_BGRA: uint = 0x10B6; -pub static CL_ARGB: uint = 0x10B7; -pub static CL_INTENSITY: uint = 0x10B8; -pub static CL_LUMINANCE: uint = 0x10B9; -pub static CL_RX: uint = 0x10BA; -pub static CL_RGX: uint = 0x10BB; -pub static CL_RGBX: uint = 0x10BC; +pub static CL_R: uint = 0x10B0; +pub static CL_A: uint = 0x10B1; +pub static CL_RG: uint = 0x10B2; +pub static CL_RA: uint = 0x10B3; +pub static CL_RGB: uint = 0x10B4; +pub static CL_RGBA: uint = 0x10B5; +pub static CL_BGRA: uint = 0x10B6; +pub static CL_ARGB: uint = 0x10B7; +pub static CL_INTENSITY: uint = 0x10B8; +pub static CL_LUMINANCE: uint = 0x10B9; +pub static CL_RX: uint = 0x10BA; +pub static CL_RGX: uint = 0x10BB; +pub static CL_RGBX: uint = 0x10BC; /* cl_channel_type */ -pub static CL_SNORM_INT8: uint = 0x10D0; -pub static CL_SNORM_INT16: uint = 0x10D1; -pub static CL_UNORM_INT8: uint = 0x10D2; -pub static CL_UNORM_INT16: uint = 0x10D3; -pub static CL_UNORM_SHORT_565: uint = 0x10D4; -pub static CL_UNORM_SHORT_555: uint = 0x10D5; -pub static CL_UNORM_INT_101010: uint = 0x10D6; -pub static CL_SIGNED_INT8: uint = 0x10D7; -pub static CL_SIGNED_INT16: uint = 0x10D8; -pub static CL_SIGNED_INT32: uint = 0x10D9; -pub static CL_UNSIGNED_INT8: uint = 0x10DA; -pub static CL_UNSIGNED_INT16: uint = 0x10DB; -pub static CL_UNSIGNED_INT32: uint = 0x10DC; -pub static CL_HALF_FLOAT: uint = 0x10DD; -pub static CL_FLOAT: uint = 0x10DE; +pub static CL_SNORM_INT8: uint = 0x10D0; +pub static CL_SNORM_INT16: uint = 0x10D1; +pub static CL_UNORM_INT8: uint = 0x10D2; +pub static CL_UNORM_INT16: uint = 0x10D3; +pub static CL_UNORM_SHORT_565: uint = 0x10D4; +pub static CL_UNORM_SHORT_555: uint = 0x10D5; +pub static CL_UNORM_INT_101010: uint = 0x10D6; +pub static CL_SIGNED_INT8: uint = 0x10D7; +pub static CL_SIGNED_INT16: uint = 0x10D8; +pub static CL_SIGNED_INT32: uint = 0x10D9; +pub static CL_UNSIGNED_INT8: uint = 0x10DA; +pub static CL_UNSIGNED_INT16: uint = 0x10DB; +pub static CL_UNSIGNED_INT32: uint = 0x10DC; +pub static CL_HALF_FLOAT: uint = 0x10DD; +pub static CL_FLOAT: uint = 0x10DE; /* cl_mem_object_type */ -pub static CL_MEM_OBJECT_BUFFER: uint = 0x10F0; -pub static CL_MEM_OBJECT_IMAGE2D: uint = 0x10F1; -pub static CL_MEM_OBJECT_IMAGE3D: uint = 0x10F2; +pub static CL_MEM_OBJECT_BUFFER: uint = 0x10F0; +pub static CL_MEM_OBJECT_IMAGE2D: uint = 0x10F1; +pub static CL_MEM_OBJECT_IMAGE3D: uint = 0x10F2; /* cl_mem_info */ -pub static CL_MEM_TYPE: uint = 0x1100; -pub static CL_MEM_FLAGS: uint = 0x1101; -pub static CL_MEM_SIZE: uint = 0x1102; -pub static CL_MEM_HOST_PTR: uint = 0x1103; -pub static CL_MEM_MAP_COUNT: uint = 0x1104; -pub static CL_MEM_REFERENCE_COUNT: uint = 0x1105; -pub static CL_MEM_CONTEXT: uint = 0x1106; -pub static CL_MEM_ASSOCIATED_MEMOBJECT: uint = 0x1107; -pub static CL_MEM_OFFSET: uint = 0x1108; +pub static CL_MEM_TYPE: uint = 0x1100; +pub static CL_MEM_FLAGS: uint = 0x1101; +pub static CL_MEM_SIZE: uint = 0x1102; +pub static CL_MEM_HOST_PTR: uint = 0x1103; +pub static CL_MEM_MAP_COUNT: uint = 0x1104; +pub static CL_MEM_REFERENCE_COUNT: uint = 0x1105; +pub static CL_MEM_CONTEXT: uint = 0x1106; +pub static CL_MEM_ASSOCIATED_MEMOBJECT: uint = 0x1107; +pub static CL_MEM_OFFSET: uint = 0x1108; /* cl_image_info */ -pub static CL_IMAGE_FORMAT: uint = 0x1110; -pub static CL_IMAGE_ELEMENT_SIZE: uint = 0x1111; -pub static CL_IMAGE_ROW_PITCH: uint = 0x1112; -pub static CL_IMAGE_SLICE_PITCH: uint = 0x1113; -pub static CL_IMAGE_WIDTH: uint = 0x1114; -pub static CL_IMAGE_HEIGHT: uint = 0x1115; -pub static CL_IMAGE_DEPTH: uint = 0x1116; +pub static CL_IMAGE_FORMAT: uint = 0x1110; +pub static CL_IMAGE_ELEMENT_SIZE: uint = 0x1111; +pub static CL_IMAGE_ROW_PITCH: uint = 0x1112; +pub static CL_IMAGE_SLICE_PITCH: uint = 0x1113; +pub static CL_IMAGE_WIDTH: uint = 0x1114; +pub static CL_IMAGE_HEIGHT: uint = 0x1115; +pub static CL_IMAGE_DEPTH: uint = 0x1116; /* cl_addressing_mode */ -pub static CL_ADDRESS_NONE: uint = 0x1130; -pub static CL_ADDRESS_CLAMP_TO_EDGE: uint = 0x1131; -pub static CL_ADDRESS_CLAMP: uint = 0x1132; -pub static CL_ADDRESS_REPEAT: uint = 0x1133; -pub static CL_ADDRESS_MIRRORED_REPEAT: uint = 0x1134; +pub static CL_ADDRESS_NONE: uint = 0x1130; +pub static CL_ADDRESS_CLAMP_TO_EDGE: uint = 0x1131; +pub static CL_ADDRESS_CLAMP: uint = 0x1132; +pub static CL_ADDRESS_REPEAT: uint = 0x1133; +pub static CL_ADDRESS_MIRRORED_REPEAT: uint = 0x1134; /* cl_filter_mode */ -pub static CL_FILTER_NEAREST: uint = 0x1140; -pub static CL_FILTER_LINEAR: uint = 0x1141; +pub static CL_FILTER_NEAREST: uint = 0x1140; +pub static CL_FILTER_LINEAR: uint = 0x1141; /* cl_sampler_info */ -pub static CL_SAMPLER_REFERENCE_COUNT: uint = 0x1150; -pub static CL_SAMPLER_CONTEXT: uint = 0x1151; -pub static CL_SAMPLER_NORMALIZED_COORDS: uint = 0x1152; -pub static CL_SAMPLER_ADDRESSING_MODE: uint = 0x1153; -pub static CL_SAMPLER_FILTER_MODE: uint = 0x1154; +pub static CL_SAMPLER_REFERENCE_COUNT: uint = 0x1150; +pub static CL_SAMPLER_CONTEXT: uint = 0x1151; +pub static CL_SAMPLER_NORMALIZED_COORDS: uint = 0x1152; +pub static CL_SAMPLER_ADDRESSING_MODE: uint = 0x1153; +pub static CL_SAMPLER_FILTER_MODE: uint = 0x1154; /* cl_map_flags - bitfield */ -pub static CL_MAP_READ: bitfield = 1; -pub static CL_MAP_WRITE: bitfield = 1 << 1; +pub static CL_MAP_READ: bitfield = 1; +pub static CL_MAP_WRITE: bitfield = 1 << 1; /* cl_program_info */ -pub static CL_PROGRAM_REFERENCE_COUNT: uint = 0x1160; -pub static CL_PROGRAM_CONTEXT: uint = 0x1161; -pub static CL_PROGRAM_NUM_DEVICES: uint = 0x1162; -pub static CL_PROGRAM_DEVICES: uint = 0x1163; -pub static CL_PROGRAM_SOURCE: uint = 0x1164; -pub static CL_PROGRAM_BINARY_SIZES: uint = 0x1165; -pub static CL_PROGRAM_BINARIES: uint = 0x1166; +pub static CL_PROGRAM_REFERENCE_COUNT: uint = 0x1160; +pub static CL_PROGRAM_CONTEXT: uint = 0x1161; +pub static CL_PROGRAM_NUM_DEVICES: uint = 0x1162; +pub static CL_PROGRAM_DEVICES: uint = 0x1163; +pub static CL_PROGRAM_SOURCE: uint = 0x1164; +pub static CL_PROGRAM_BINARY_SIZES: uint = 0x1165; +pub static CL_PROGRAM_BINARIES: uint = 0x1166; /* cl_program_build_info */ -pub static CL_PROGRAM_BUILD_STATUS: uint = 0x1181; -pub static CL_PROGRAM_BUILD_OPTIONS: uint = 0x1182; -pub static CL_PROGRAM_BUILD_LOG: uint = 0x1183; +pub static CL_PROGRAM_BUILD_STATUS: uint = 0x1181; +pub static CL_PROGRAM_BUILD_OPTIONS: uint = 0x1182; +pub static CL_PROGRAM_BUILD_LOG: uint = 0x1183; /* cl_build_status */ -pub static CL_BUILD_SUCCESS: uint = 0; -pub static CL_BUILD_NONE: uint = (-1isize) as uint; -pub static CL_BUILD_ERROR: uint = -2isize as uint; -pub static CL_BUILD_IN_PROGRESS: uint = -3isize as uint; +pub static CL_BUILD_SUCCESS: uint = 0; +pub static CL_BUILD_NONE: uint = (-1isize) as uint; +pub static CL_BUILD_ERROR: uint = -2isize as uint; +pub static CL_BUILD_IN_PROGRESS: uint = -3isize as uint; /* cl_kernel_info */ -pub static CL_KERNEL_FUNCTION_NAME: uint = 0x1190; -pub static CL_KERNEL_NUM_ARGS: uint = 0x1191; -pub static CL_KERNEL_REFERENCE_COUNT: uint = 0x1192; -pub static CL_KERNEL_CONTEXT: uint = 0x1193; -pub static CL_KERNEL_PROGRAM: uint = 0x1194; +pub static CL_KERNEL_FUNCTION_NAME: uint = 0x1190; +pub static CL_KERNEL_NUM_ARGS: uint = 0x1191; +pub static CL_KERNEL_REFERENCE_COUNT: uint = 0x1192; +pub static CL_KERNEL_CONTEXT: uint = 0x1193; +pub static CL_KERNEL_PROGRAM: uint = 0x1194; /* cl_kernel_work_group_info */ -pub static CL_KERNEL_WORK_GROUP_SIZE: uint = 0x11B0; -pub static CL_KERNEL_COMPILE_WORK_GROUP_SIZE: uint = 0x11B1; -pub static CL_KERNEL_LOCAL_MEM_SIZE: uint = 0x11B2; +pub static CL_KERNEL_WORK_GROUP_SIZE: uint = 0x11B0; +pub static CL_KERNEL_COMPILE_WORK_GROUP_SIZE: uint = 0x11B1; +pub static CL_KERNEL_LOCAL_MEM_SIZE: uint = 0x11B2; pub static CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: uint = 0x11B3; -pub static CL_KERNEL_PRIVATE_MEM_SIZE: uint = 0x11B4; +pub static CL_KERNEL_PRIVATE_MEM_SIZE: uint = 0x11B4; /* cl_event_info */ -pub static CL_EVENT_COMMAND_QUEUE: uint = 0x11D0; -pub static CL_EVENT_COMMAND_TYPE: uint = 0x11D1; -pub static CL_EVENT_REFERENCE_COUNT: uint = 0x11D2; -pub static CL_EVENT_COMMAND_EXECUTION_STATUS: uint = 0x11D3; -pub static CL_EVENT_CONTEXT: uint = 0x11D4; +pub static CL_EVENT_COMMAND_QUEUE: uint = 0x11D0; +pub static CL_EVENT_COMMAND_TYPE: uint = 0x11D1; +pub static CL_EVENT_REFERENCE_COUNT: uint = 0x11D2; +pub static CL_EVENT_COMMAND_EXECUTION_STATUS: uint = 0x11D3; +pub static CL_EVENT_CONTEXT: uint = 0x11D4; /* cl_command_type */ -pub static CL_COMMAND_NDRANGE_KERNEL: uint = 0x11F0; -pub static CL_COMMAND_TASK: uint = 0x11F1; -pub static CL_COMMAND_NATIVE_KERNEL: uint = 0x11F2; -pub static CL_COMMAND_READ_BUFFER: uint = 0x11F3; -pub static CL_COMMAND_WRITE_BUFFER: uint = 0x11F4; -pub static CL_COMMAND_COPY_BUFFER: uint = 0x11F5; -pub static CL_COMMAND_READ_IMAGE: uint = 0x11F6; -pub static CL_COMMAND_WRITE_IMAGE: uint = 0x11F7; -pub static CL_COMMAND_COPY_IMAGE: uint = 0x11F8; -pub static CL_COMMAND_COPY_IMAGE_TO_BUFFER: uint = 0x11F9; -pub static CL_COMMAND_COPY_BUFFER_TO_IMAGE: uint = 0x11FA; -pub static CL_COMMAND_MAP_BUFFER: uint = 0x11FB; -pub static CL_COMMAND_MAP_IMAGE: uint = 0x11FC; -pub static CL_COMMAND_UNMAP_MEM_OBJECT: uint = 0x11FD; -pub static CL_COMMAND_MARKER: uint = 0x11FE; -pub static CL_COMMAND_ACQUIRE_GL_OBJECTS: uint = 0x11FF; -pub static CL_COMMAND_RELEASE_GL_OBJECTS: uint = 0x1200; -pub static CL_COMMAND_READ_BUFFER_RECT: uint = 0x1201; -pub static CL_COMMAND_WRITE_BUFFER_RECT: uint = 0x1202; -pub static CL_COMMAND_COPY_BUFFER_RECT: uint = 0x1203; -pub static CL_COMMAND_USER: uint = 0x1204; +pub static CL_COMMAND_NDRANGE_KERNEL: uint = 0x11F0; +pub static CL_COMMAND_TASK: uint = 0x11F1; +pub static CL_COMMAND_NATIVE_KERNEL: uint = 0x11F2; +pub static CL_COMMAND_READ_BUFFER: uint = 0x11F3; +pub static CL_COMMAND_WRITE_BUFFER: uint = 0x11F4; +pub static CL_COMMAND_COPY_BUFFER: uint = 0x11F5; +pub static CL_COMMAND_READ_IMAGE: uint = 0x11F6; +pub static CL_COMMAND_WRITE_IMAGE: uint = 0x11F7; +pub static CL_COMMAND_COPY_IMAGE: uint = 0x11F8; +pub static CL_COMMAND_COPY_IMAGE_TO_BUFFER: uint = 0x11F9; +pub static CL_COMMAND_COPY_BUFFER_TO_IMAGE: uint = 0x11FA; +pub static CL_COMMAND_MAP_BUFFER: uint = 0x11FB; +pub static CL_COMMAND_MAP_IMAGE: uint = 0x11FC; +pub static CL_COMMAND_UNMAP_MEM_OBJECT: uint = 0x11FD; +pub static CL_COMMAND_MARKER: uint = 0x11FE; +pub static CL_COMMAND_ACQUIRE_GL_OBJECTS: uint = 0x11FF; +pub static CL_COMMAND_RELEASE_GL_OBJECTS: uint = 0x1200; +pub static CL_COMMAND_READ_BUFFER_RECT: uint = 0x1201; +pub static CL_COMMAND_WRITE_BUFFER_RECT: uint = 0x1202; +pub static CL_COMMAND_COPY_BUFFER_RECT: uint = 0x1203; +pub static CL_COMMAND_USER: uint = 0x1204; /* command execution status */ -pub static CL_COMPLETE: uint = 0x0; -pub static CL_RUNNING: uint = 0x1; -pub static CL_SUBMITTED: uint = 0x2; -pub static CL_QUEUED: uint = 0x3; +pub static CL_COMPLETE: uint = 0x0; +pub static CL_RUNNING: uint = 0x1; +pub static CL_SUBMITTED: uint = 0x2; +pub static CL_QUEUED: uint = 0x3; /* cl_buffer_create_type */ -pub static CL_BUFFER_CREATE_TYPE_REGION: uint = 0x1220; +pub static CL_BUFFER_CREATE_TYPE_REGION: uint = 0x1220; /* cl_profiling_info */ -pub static CL_PROFILING_COMMAND_QUEUED: uint = 0x1280; -pub static CL_PROFILING_COMMAND_SUBMIT: uint = 0x1281; -pub static CL_PROFILING_COMMAND_START: uint = 0x1282; -pub static CL_PROFILING_COMMAND_END: uint = 0x1283; +pub static CL_PROFILING_COMMAND_QUEUED: uint = 0x1280; +pub static CL_PROFILING_COMMAND_SUBMIT: uint = 0x1281; +pub static CL_PROFILING_COMMAND_START: uint = 0x1282; +pub static CL_PROFILING_COMMAND_END: uint = 0x1283; diff --git a/coaster/src/frameworks/opencl/context.rs b/coaster/src/frameworks/opencl/context.rs index 1c6a827c8..6f034881f 100644 --- a/coaster/src/frameworks/opencl/context.rs +++ b/coaster/src/frameworks/opencl/context.rs @@ -1,24 +1,24 @@ //! Provides a Rust wrapper around OpenCL's context. -use device::{IDevice, MemorySync}; -use device::Error as DeviceError; use super::api::types as cl; -use super::{API, Error, Device, Queue, Platform}; use super::memory::*; -#[cfg(feature = "native")] -use frameworks::native::flatbox::FlatBox; +use super::{Device, Error, Platform, Queue, API}; +use device::Error as DeviceError; +use device::{IDevice, MemorySync}; #[cfg(feature = "native")] use frameworks::native::device::Cpu; +#[cfg(feature = "native")] +use frameworks::native::flatbox::FlatBox; use std::any::Any; -use std::{ptr, mem}; use std::hash::{Hash, Hasher}; +use std::{mem, ptr}; #[derive(Debug, Clone)] /// Defines a OpenCL Context. pub struct Context { id: isize, devices: Vec, - queue: Option + queue: Option, } /// The individual properties for `ContextInfo::Properties` @@ -40,7 +40,7 @@ pub enum ContextInfoQuery { /// The properties the context was configured with. Properties, /// The devices (IDs) in the context. - Devices + Devices, } /// OpenCL context info types. Each variant is returned from the same function, @@ -67,7 +67,7 @@ pub enum ContextInfo { /// depending on CL extensions. Properties(Vec), /// The devices (IDs) in the context. - Devices(Vec) + Devices(Vec), } impl Context { @@ -75,8 +75,9 @@ impl Context { pub fn new(devices: Vec) -> Result { let callback = unsafe { mem::transmute(ptr::null::()) }; let mut context = Context::from_c( - API::create_context(devices.clone(), ptr::null(), callback, ptr::null_mut())?, - devices.clone()); + API::create_context(devices.clone(), ptr::null(), callback, ptr::null_mut())?, + devices.clone(), + ); // initialize queue context.queue_mut(); Ok(context) @@ -84,7 +85,11 @@ impl Context { /// Initializes a new OpenCL platform from its C type. pub fn from_c(id: cl::context_id, devices: Vec) -> Context { - Context { id: id as isize, devices: devices, queue: None } + Context { + id: id as isize, + devices: devices, + queue: None, + } } /// Returns Queue for first device. @@ -103,7 +108,7 @@ impl Context { } /// Get certain parameters of the context, defined by `ContextInfoQuery`. - pub fn get_context_info(&self, query : ContextInfoQuery) -> Result { + pub fn get_context_info(&self, query: ContextInfoQuery) -> Result { API::get_context_info(self.id as cl::context_id, query) } @@ -136,30 +141,50 @@ impl IDevice for Context { } impl MemorySync for Context { - fn sync_in(&self, my_memory: &mut Any, src_device: &Any, src_memory: &Any) - -> Result<(), DeviceError> { + fn sync_in( + &self, + my_memory: &mut Any, + src_device: &Any, + src_memory: &Any, + ) -> Result<(), DeviceError> { if let Some(_) = src_device.downcast_ref::() { let mut my_mem = my_memory.downcast_mut::().unwrap(); let src_mem = src_memory.downcast_ref::().unwrap(); API::write_to_memory( - self.queue().unwrap(), my_mem, true, 0, - src_mem.byte_size(), src_mem.as_slice().as_ptr(), &[])?; + self.queue().unwrap(), + my_mem, + true, + 0, + src_mem.byte_size(), + src_mem.as_slice().as_ptr(), + &[], + )?; Ok(()) } else { Err(DeviceError::NoMemorySyncRoute) } } - fn sync_out(&self, my_memory: &Any, dst_device: &Any, dst_memory: &mut Any) - -> Result<(), DeviceError> { + fn sync_out( + &self, + my_memory: &Any, + dst_device: &Any, + dst_memory: &mut Any, + ) -> Result<(), DeviceError> { if let Some(_) = dst_device.downcast_ref::() { let my_mem = my_memory.downcast_ref::().unwrap(); let mut dst_mem = dst_memory.downcast_mut::().unwrap(); API::read_from_memory( - self.queue().unwrap(), my_mem, true, 0, - dst_mem.byte_size(), dst_mem.as_mut_slice().as_mut_ptr(), &[])?; + self.queue().unwrap(), + my_mem, + true, + 0, + dst_mem.byte_size(), + dst_mem.as_mut_slice().as_mut_ptr(), + &[], + )?; Ok(()) } else { Err(DeviceError::NoMemorySyncRoute) @@ -167,7 +192,6 @@ impl MemorySync for Context { } } - impl PartialEq for Context { fn eq(&self, other: &Self) -> bool { self.id() == other.id() diff --git a/coaster/src/frameworks/opencl/device.rs b/coaster/src/frameworks/opencl/device.rs index d3ed02a78..7f9d60955 100644 --- a/coaster/src/frameworks/opencl/device.rs +++ b/coaster/src/frameworks/opencl/device.rs @@ -1,10 +1,10 @@ //! Provides a Rust wrapper around OpenCL's device. -use hardware::{IHardware, HardwareType}; use super::api::types as cl; use super::api::API; -use std::io::Cursor; use byteorder::{ByteOrder, LittleEndian, ReadBytesExt}; +use hardware::{HardwareType, IHardware}; +use std::io::Cursor; use regex::Regex; use std::cmp::Ordering; @@ -43,7 +43,6 @@ impl Ord for Version { } } - impl PartialOrd for Version { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -51,7 +50,7 @@ impl PartialOrd for Version { } impl Version { - fn new(major : usize, minor : usize, ext : Option) -> Version { + fn new(major: usize, minor: usize, ext: Option) -> Version { Version { major, minor, ext } } } @@ -86,12 +85,18 @@ impl Default for Device { impl Device { /// Initializes a new OpenCL device. pub fn from_isize(id: isize) -> Device { - Device { id: id, ..Device::default() } + Device { + id: id, + ..Device::default() + } } /// Initializes a new OpenCL device from its C type. pub fn from_c(id: cl::device_id) -> Device { - Device { id: id as isize, ..Device::default() } + Device { + id: id as isize, + ..Device::default() + } } /// Returns the id as its C type. @@ -103,7 +108,7 @@ impl Device { pub fn load_name(&mut self) -> Self { self.name = match API::load_device_info(self, cl::CL_DEVICE_NAME) { Ok(result) => Some(result.to_string()), - Err(_) => None + Err(_) => None, }; self.clone() } @@ -119,10 +124,10 @@ impl Device { cl::CL_DEVICE_TYPE_ACCELERATOR => Some(HardwareType::ACCELERATOR), cl::CL_DEVICE_TYPE_DEFAULT => Some(HardwareType::OTHER), cl::CL_DEVICE_TYPE_CUSTOM => Some(HardwareType::OTHER), - _ => None + _ => None, } - }, - Err(_) => None + } + Err(_) => None, }; self.clone() } @@ -131,7 +136,7 @@ impl Device { pub fn load_compute_units(&mut self) -> Self { self.compute_units = match API::load_device_info(self, cl::CL_DEVICE_MAX_COMPUTE_UNITS) { Ok(result) => Some(result.to_isize()), - Err(_) => None + Err(_) => None, }; self.clone() } @@ -140,7 +145,7 @@ impl Device { pub fn load_version(&mut self) -> Self { self.version = match API::load_device_info(self, cl::CL_DEVICE_VERSION) { Ok(result) => Some(result.to_version()), - Err(_) => None + Err(_) => None, }; self.clone() } @@ -149,7 +154,7 @@ impl Device { pub fn load_vendor(&mut self) -> Self { self.vendor = match API::load_device_info(self, cl::CL_DEVICE_VENDOR) { Ok(result) => Some(result.to_string()), - Err(_) => None + Err(_) => None, }; self.clone() } @@ -209,7 +214,6 @@ pub struct DeviceInfo { } impl DeviceInfo { - /// Initializes a new Device Info pub fn new(info: Vec) -> DeviceInfo { DeviceInfo { info: info } @@ -235,15 +239,17 @@ impl DeviceInfo { #[allow(missing_docs)] pub fn to_version(self) -> Version { lazy_static! { - static ref VERSION_RE: Regex = Regex::new(r"OpenCL\s([0-9])+\.([0-9]*)\s([[:print:]])").unwrap(); + static ref VERSION_RE: Regex = + Regex::new(r"OpenCL\s([0-9])+\.([0-9]*)\s([[:print:]])").unwrap(); } let version_string = unsafe { String::from_utf8_unchecked(self.info) }; for cap in VERSION_RE.captures_iter(version_string.as_str()) { - return Version::new(cap[1].to_string().parse::().unwrap(), - cap[2].to_string().parse::().unwrap(), - Some(cap[3].to_string())) + return Version::new( + cap[1].to_string().parse::().unwrap(), + cap[2].to_string().parse::().unwrap(), + Some(cap[3].to_string()), + ); } - Version::new(0,0,None) + Version::new(0, 0, None) } - } diff --git a/coaster/src/frameworks/opencl/kernel.rs b/coaster/src/frameworks/opencl/kernel.rs index 2e3dedb0b..09f2a74aa 100644 --- a/coaster/src/frameworks/opencl/kernel.rs +++ b/coaster/src/frameworks/opencl/kernel.rs @@ -1,7 +1,7 @@ //! Provides a Rust wrapper around OpenCL's Kernel. -use operation::IOperation; use super::api::types as cl; +use operation::IOperation; #[derive(Debug, Copy, Clone)] /// Defines a OpenCL Kernel. diff --git a/coaster/src/frameworks/opencl/memory.rs b/coaster/src/frameworks/opencl/memory.rs index ec41c3bbf..412e1e3c9 100644 --- a/coaster/src/frameworks/opencl/memory.rs +++ b/coaster/src/frameworks/opencl/memory.rs @@ -1,10 +1,10 @@ #![allow(missing_docs)] use super::api::types as cl; -use super::api::{API, Error}; +use super::api::{Error, API}; use super::Context; use device::IMemory; -use std::{ptr, fmt}; +use std::{fmt, ptr}; /// Holds a OpenCL memory id and manages its deallocation pub struct Memory { diff --git a/coaster/src/frameworks/opencl/mod.rs b/coaster/src/frameworks/opencl/mod.rs index 47d83824e..9f1b96c3c 100644 --- a/coaster/src/frameworks/opencl/mod.rs +++ b/coaster/src/frameworks/opencl/mod.rs @@ -7,27 +7,27 @@ //! OpenCL device -> Hardware //! OpenCL context -> Device -use backend::{Backend, IBackend}; -use framework::IFramework; -pub use self::platform::Platform; +pub use self::api::{Error, API}; pub use self::context::Context; -pub use self::memory::{Memory, MemoryFlags}; -pub use self::queue::{Queue, QueueFlags}; +pub use self::device::{Device, DeviceInfo}; pub use self::event::Event; pub use self::kernel::Kernel; +pub use self::memory::{Memory, MemoryFlags}; +pub use self::platform::Platform; pub use self::program::Program; -pub use self::device::{Device, DeviceInfo}; -pub use self::api::{API, Error}; +pub use self::queue::{Queue, QueueFlags}; +use backend::{Backend, IBackend}; +use framework::IFramework; -pub mod device; -pub mod platform; +mod api; pub mod context; -pub mod memory; -pub mod queue; +pub mod device; pub mod event; pub mod kernel; +pub mod memory; +pub mod platform; pub mod program; -mod api; +pub mod queue; #[derive(Debug, Clone)] /// Provides the OpenCL Framework. @@ -48,20 +48,22 @@ impl IFramework for OpenCL { type D = Context; type B = Program; - fn ID() -> &'static str { "OPENCL" } + fn ID() -> &'static str { + "OPENCL" + } fn new() -> OpenCL { let hardwares = OpenCL::load_hardwares().expect("Acquiring hw never fails. qed"); Self { hardwares, - binary: Program::from_isize(1) + binary: Program::from_isize(1), } } fn load_hardwares() -> Result, ::framework::Error> { let platforms = API::load_platforms()?; - let mut hardware_container: Vec = vec!(); + let mut hardware_container: Vec = vec![]; for platform in &platforms { if let Ok(hardwares) = API::load_devices(platform) { hardware_container.append(&mut hardwares.clone()) diff --git a/coaster/src/frameworks/opencl/program.rs b/coaster/src/frameworks/opencl/program.rs index 37242022e..9be294d73 100644 --- a/coaster/src/frameworks/opencl/program.rs +++ b/coaster/src/frameworks/opencl/program.rs @@ -1,7 +1,7 @@ //! Provides a Rust wrapper around OpenCL's Program. -use binary::IBinary; use super::api::types as cl; +use binary::IBinary; #[derive(Debug, Copy, Clone)] /// Defines a OpenCL Program. @@ -15,16 +15,12 @@ pub struct Program { impl Program { /// Initializes a new OpenCL device. pub fn from_isize(id: isize) -> Program { - Program { - id: id, - } + Program { id: id } } /// Initializes a new OpenCL device from its C type. pub fn from_c(id: cl::kernel_id) -> Program { - Program { - id: id as isize, - } + Program { id: id as isize } } /// Returns the id as its C type. diff --git a/coaster/src/frameworks/opencl/queue.rs b/coaster/src/frameworks/opencl/queue.rs index 0e014b79a..f2765ff15 100644 --- a/coaster/src/frameworks/opencl/queue.rs +++ b/coaster/src/frameworks/opencl/queue.rs @@ -11,7 +11,7 @@ //! synchronization. use super::api::types as cl; -use super::api::{API, Error}; +use super::api::{Error, API}; use super::Context; use super::Device; use super::Event; @@ -28,7 +28,11 @@ impl Queue { /// Create a new command queue for the provided `context` and `device`. /// /// If no `queue_flags` are provided, the defaults are used. - pub fn new(context: &Context, device: &Device, queue_flags: Option<&QueueFlags>) -> Result { + pub fn new( + context: &Context, + device: &Device, + queue_flags: Option<&QueueFlags>, + ) -> Result { let default_flags = QueueFlags::default(); let flags = queue_flags.unwrap_or(&default_flags); API::create_queue(context, device, flags) @@ -62,9 +66,17 @@ impl Queue { global_work_offset: usize, global_work_size: usize, local_work_size: usize, - event_wait_list: &[Event] + event_wait_list: &[Event], ) -> Result { - API::enqueue_kernel(self, kernel, work_dim, global_work_offset, global_work_size, local_work_size, event_wait_list) + API::enqueue_kernel( + self, + kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + event_wait_list, + ) } } diff --git a/coaster/src/hardware.rs b/coaster/src/hardware.rs index d53895569..2cbe9c379 100644 --- a/coaster/src/hardware.rs +++ b/coaster/src/hardware.rs @@ -17,7 +17,7 @@ pub enum HardwareType { /// Hardware Accelerator devices ACCELERATOR, /// Used for anything else - OTHER + OTHER, } /// Specifies Hardware behavior accross frameworks. diff --git a/coaster/src/lib.rs b/coaster/src/lib.rs index 582ff9640..ea1381f80 100644 --- a/coaster/src/lib.rs +++ b/coaster/src/lib.rs @@ -141,46 +141,45 @@ clippy::unused_qualifications, clippy::complexity )] - #![cfg_attr(feature = "unstable_alloc", feature(alloc))] #[cfg(feature = "unstable_alloc")] extern crate alloc; -extern crate libc; extern crate bitflags; extern crate enum_primitive; extern crate lazy_static; +extern crate libc; +extern crate byteorder; +extern crate num; #[cfg(feature = "opencl")] extern crate regex; -extern crate num; -extern crate byteorder; pub mod backend; +pub mod binary; pub mod device; -pub mod hardware; +pub mod error; pub mod framework; pub mod frameworks; -pub mod tensor; +pub mod hardware; pub mod operation; -pub mod binary; -pub mod error; pub mod plugin; +pub mod tensor; // These will be exported with the prelude. pub use crate::backend::*; pub use crate::device::{IDevice, IMemory}; -pub use crate::hardware::{IHardware, HardwareType}; pub use crate::framework::IFramework; -pub use crate::tensor::{SharedTensor, TensorDesc, ITensorDesc, IntoTensorDesc}; +#[cfg(feature = "cuda")] +pub use crate::frameworks::Cuda; #[cfg(feature = "native")] pub use crate::frameworks::Native; +pub use crate::hardware::{HardwareType, IHardware}; +pub use crate::tensor::{ITensorDesc, IntoTensorDesc, SharedTensor, TensorDesc}; #[cfg(feature = "cuda")] -pub use crate::frameworks::Cuda; +extern crate rcublas as cublas; #[cfg(feature = "cuda")] extern crate rcudnn as cudnn; -#[cfg(feature = "cuda")] -extern crate rcublas as cublas; #[cfg(feature = "opencl")] pub use frameworks::OpenCL; @@ -204,14 +203,14 @@ pub use crate::error::Error; pub mod prelude { pub use crate::backend::*; pub use crate::device::{IDevice, IMemory}; - pub use crate::hardware::{IHardware, HardwareType}; pub use crate::framework::IFramework; pub use crate::frameworks::native::flatbox::FlatBox; - pub use crate::tensor::{SharedTensor, TensorDesc, ITensorDesc, IntoTensorDesc}; - #[cfg(feature = "native")] - pub use crate::frameworks::Native; #[cfg(feature = "cuda")] pub use crate::frameworks::Cuda; + #[cfg(feature = "native")] + pub use crate::frameworks::Native; + pub use crate::hardware::{HardwareType, IHardware}; + pub use crate::tensor::{ITensorDesc, IntoTensorDesc, SharedTensor, TensorDesc}; #[cfg(feature = "opencl")] pub use frameworks::OpenCL; } diff --git a/coaster/src/tensor.rs b/coaster/src/tensor.rs index 26dac93ae..8610dea05 100644 --- a/coaster/src/tensor.rs +++ b/coaster/src/tensor.rs @@ -47,14 +47,14 @@ //! # } //! ``` -use crate::device::{IDevice, MemorySync}; use crate::device::Error as DeviceError; +use crate::device::{IDevice, MemorySync}; use std::any::Any; use std::cell::{Cell, RefCell}; use std::marker::PhantomData; -use std::{fmt, mem, error}; use std::ops::Deref; +use std::{error, fmt, mem}; /// Describes the Descriptor of a SharedTensor. pub type TensorDesc = Vec; @@ -131,7 +131,7 @@ pub trait ITensorDesc { 1 => { strides.push(1); strides - }, + } _ => { let imp_dims = &self.dims()[1..dim_length]; for (i, _) in imp_dims.iter().enumerate() { @@ -252,7 +252,7 @@ impl ITensorDesc for TensorDesc { fn size(&self) -> usize { match self.rank() { 0 => 1, - _ => self.iter().product() + _ => self.iter().product(), } } @@ -265,8 +265,7 @@ impl ITensorDesc for TensorDesc { } } - -impl fmt::Debug for SharedTensor { +impl fmt::Debug for SharedTensor { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { write!(f, "SharedTensor desc={:?}", self.desc) } @@ -294,7 +293,9 @@ impl SharedTensor { self.desc = new_desc; Ok(()) } else { - Err(Error::InvalidShape("Size of the provided shape is not equal to the old shape.")) + Err(Error::InvalidShape( + "Size of the provided shape is not equal to the old shape.", + )) } } @@ -323,8 +324,7 @@ impl SharedTensor { /// Looks up `device` in self.locations and returns its index. If lookup /// fails then new location is created and its index is returned. - fn get_or_create_location_index(&self, device: &D) - -> Result { + fn get_or_create_location_index(&self, device: &D) -> Result { if let Some(i) = self.get_location_index(device) { return Ok(i); } @@ -376,36 +376,49 @@ impl SharedTensor { // about CUDA at all. So if first attempt fails we change order and // try again. match src_loc.mem_transfer.sync_out( - src_loc.mem.deref(), dst_loc.device.deref(), dst_loc.mem.as_mut()) { - Err(DeviceError::NoMemorySyncRoute) => {}, + src_loc.mem.deref(), + dst_loc.device.deref(), + dst_loc.mem.as_mut(), + ) { + Err(DeviceError::NoMemorySyncRoute) => {} x => return x.map_err(|e| e.into()), } match dst_loc.mem_transfer.sync_in( - dst_loc.mem.as_mut(), src_loc.device.deref(), src_loc.mem.deref()) { - Err(DeviceError::NoMemorySyncRoute) => {}, + dst_loc.mem.as_mut(), + src_loc.device.deref(), + src_loc.mem.deref(), + ) { + Err(DeviceError::NoMemorySyncRoute) => {} x => return x.map_err(|e| e.into()), } - // If there is no direct path, we take the detour via native - // and do an indirect transfer. - if cfg!(feature = "native") { - use crate::framework::IFramework; - use crate::frameworks::native::Native; - let native_framework = Native::new(); - let native_device = native_framework.new_device(native_framework.hardwares()).unwrap(); // FIXME - let mut native_mem = native_device.alloc_memory(self.desc.size()).unwrap(); // FIXME calculate size - match src_loc.mem_transfer.sync_out( - src_loc.mem.deref(), &native_device, &mut native_mem) { - Err(DeviceError::NoMemorySyncRoute) => {}, - x => return x.map_err(|e| e.into()), - } - match dst_loc.mem_transfer.sync_in( - dst_loc.mem.as_mut(), &native_device, &native_mem) { - Err(DeviceError::NoMemorySyncRoute) => {}, - x => return x.map_err(|e| e.into()), - } - Ok(()) + // If there is no direct path, we take the detour via native + // and do an indirect transfer. + if cfg!(feature = "native") { + use crate::framework::IFramework; + use crate::frameworks::native::Native; + let native_framework = Native::new(); + let native_device = native_framework + .new_device(native_framework.hardwares()) + .unwrap(); // FIXME + let mut native_mem = native_device.alloc_memory(self.desc.size()).unwrap(); // FIXME calculate size + match src_loc.mem_transfer.sync_out( + src_loc.mem.deref(), + &native_device, + &mut native_mem, + ) { + Err(DeviceError::NoMemorySyncRoute) => {} + x => return x.map_err(|e| e.into()), + } + match dst_loc + .mem_transfer + .sync_in(dst_loc.mem.as_mut(), &native_device, &native_mem) + { + Err(DeviceError::NoMemorySyncRoute) => {} + x => return x.map_err(|e| e.into()), + } + Ok(()) } else { Err(DeviceError::NoMemorySyncRoute.into()) } @@ -431,7 +444,10 @@ impl SharedTensor { self.up_to_date.set(self.up_to_date.get() | (1 << i)); let locs = self.locations.borrow(); - let mem: &D::M = &locs[i].mem.deref().downcast_ref() + let mem: &D::M = &locs[i] + .mem + .deref() + .downcast_ref() .expect("Broken invariant: wrong memory type"); let mem_a: &'a D::M = unsafe { ::std::mem::transmute(mem) }; Ok(mem_a) @@ -439,8 +455,7 @@ impl SharedTensor { /// Get memory for reading and writing on the specified `device`. /// Can fail if memory allocation fails, or if tensor wasn't initialized yet. - pub fn read_write<'a, D: IDevice>(&'a mut self, device: &D) - -> Result<&'a mut D::M, Error> { + pub fn read_write<'a, D: IDevice>(&'a mut self, device: &D) -> Result<&'a mut D::M, Error> { if self.up_to_date.get() == 0 { return Err(Error::UninitializedMemory); } @@ -449,7 +464,10 @@ impl SharedTensor { self.up_to_date.set(1 << i); let mut locs = self.locations.borrow_mut(); - let mem: &mut D::M = &mut locs[i].mem.as_mut().downcast_mut() + let mem: &mut D::M = &mut locs[i] + .mem + .as_mut() + .downcast_mut() .expect("Broken invariant: wrong memory type"); let mem_a: &'a mut D::M = unsafe { ::std::mem::transmute(mem) }; Ok(mem_a) @@ -462,13 +480,15 @@ impl SharedTensor { /// uninitialized data later. If caller has failed to overwrite memory, /// for some reason, it must call `invalidate()` to return vector to /// uninitialized state. - pub fn write_only<'a, D: IDevice>(&'a mut self, device: &D) - -> Result<&'a mut D::M, Error> { + pub fn write_only<'a, D: IDevice>(&'a mut self, device: &D) -> Result<&'a mut D::M, Error> { let i = self.get_or_create_location_index(device)?; self.up_to_date.set(1 << i); let mut locs = self.locations.borrow_mut(); - let mem: &mut D::M = &mut locs[i].mem.as_mut().downcast_mut() + let mem: &mut D::M = &mut locs[i] + .mem + .as_mut() + .downcast_mut() .expect("Broken invariant: wrong memory type"); let mem_a: &'a mut D::M = unsafe { ::std::mem::transmute(mem) }; Ok(mem_a) @@ -488,14 +508,15 @@ impl SharedTensor { let upper = (up_to_date >> 1) & (!mask); self.up_to_date.set(lower | upper); Ok(()) - }, - None => - Err(Error::InvalidRemove("Memory isn't allocated on this device")) + } + None => Err(Error::InvalidRemove( + "Memory isn't allocated on this device", + )), } } - // force synchronize initialized memory to a device - /// Allocates an already filled memory block on a device. - /// This is a special needs function for performance concerns and should be avoided where possible. + // force synchronize initialized memory to a device + /// Allocates an already filled memory block on a device. + /// This is a special needs function for performance concerns and should be avoided where possible. fn sync(&mut self, device: &D) -> Result<(), Error> { if self.up_to_date.get() == 0 { return Err(Error::UninitializedMemory); @@ -546,7 +567,7 @@ impl fmt::Display for Error { Error::InvalidRemove(e) => (*e).to_string(), Error::InvalidShape(e) => (*e).to_string(), Error::CapacityExceeded => "CapacityExceeded".to_string(), - Error::UninitializedMemory => "UninitializedMemory".to_string() + Error::UninitializedMemory => "UninitializedMemory".to_string(), }; write!(f, "{}", msg) } diff --git a/coaster/tests/backend_specs.rs b/coaster/tests/backend_specs.rs index 95d6ea42b..67f5673c6 100644 --- a/coaster/tests/backend_specs.rs +++ b/coaster/tests/backend_specs.rs @@ -5,8 +5,8 @@ extern crate libc; mod backend_spec { #[cfg(feature = "native")] mod native { - use std::rc::Rc; use crate::co::prelude::*; + use std::rc::Rc; #[test] fn it_can_create_default_backend() { @@ -23,7 +23,7 @@ mod backend_spec { } fn use_ibackend(backend: Rc) { - let backend: Rc> = backend.clone(); + let backend: Rc> = backend.clone(); backend.device(); } } diff --git a/coaster/tests/compiletests.rs b/coaster/tests/compiletests.rs index 0a51ff941..0c883f5ac 100644 --- a/coaster/tests/compiletests.rs +++ b/coaster/tests/compiletests.rs @@ -7,8 +7,7 @@ fn run_mode(mode: &'static str) { let mut config = compiletest::default_config(); let cfg_mode = mode.parse().ok().expect("Invalid mode"); - config.target_rustcflags = Some("-L target/debug/ -L target/debug/deps/" - .to_owned()); + config.target_rustcflags = Some("-L target/debug/ -L target/debug/deps/".to_owned()); config.mode = cfg_mode; config.src_base = PathBuf::from(format!("tests/{}", mode)); diff --git a/coaster/tests/framework_cuda_specs.rs b/coaster/tests/framework_cuda_specs.rs index 8a302d777..7b85f2eaf 100644 --- a/coaster/tests/framework_cuda_specs.rs +++ b/coaster/tests/framework_cuda_specs.rs @@ -4,8 +4,8 @@ extern crate libc; #[cfg(test)] #[cfg(feature = "cuda")] mod framework_cuda_spec { - use crate::co::prelude::*; use crate::co::frameworks::cuda::memory::*; + use crate::co::prelude::*; #[test] fn it_works() { diff --git a/coaster/tests/framework_opencl_specs.rs b/coaster/tests/framework_opencl_specs.rs index baa678691..d6ab4831d 100644 --- a/coaster/tests/framework_opencl_specs.rs +++ b/coaster/tests/framework_opencl_specs.rs @@ -4,10 +4,10 @@ extern crate libc; #[cfg(test)] #[cfg(feature = "opencl")] mod framework_opencl_spec { - use co::prelude::*; + use co::frameworks::opencl::context::*; use co::frameworks::opencl::memory::*; use co::frameworks::opencl::queue::*; - use co::frameworks::opencl::context::*; + use co::prelude::*; #[test] fn it_works() { @@ -41,9 +41,21 @@ mod framework_opencl_spec { fn it_queries_context_info() { let frm = OpenCL::new(); let ctx = frm.new_device(&frm.hardwares()[0..1]).unwrap(); - println!("ReferenceCount: {:?}", ctx.get_context_info(ContextInfoQuery::ReferenceCount)); - println!("NumDevices: {:?}", ctx.get_context_info(ContextInfoQuery::NumDevices)); - println!("Devices: {:?}", ctx.get_context_info(ContextInfoQuery::Devices)); - println!("Properties: {:?}", ctx.get_context_info(ContextInfoQuery::Properties)); - } + println!( + "ReferenceCount: {:?}", + ctx.get_context_info(ContextInfoQuery::ReferenceCount) + ); + println!( + "NumDevices: {:?}", + ctx.get_context_info(ContextInfoQuery::NumDevices) + ); + println!( + "Devices: {:?}", + ctx.get_context_info(ContextInfoQuery::Devices) + ); + println!( + "Properties: {:?}", + ctx.get_context_info(ContextInfoQuery::Properties) + ); + } } diff --git a/coaster/tests/hardware_specs.rs b/coaster/tests/hardware_specs.rs index 86b678522..b9a725119 100644 --- a/coaster/tests/hardware_specs.rs +++ b/coaster/tests/hardware_specs.rs @@ -4,8 +4,8 @@ extern crate libc; #[cfg(test)] #[cfg(feature = "opencl")] mod hardware_spec { - use co::prelude::*; use co::frameworks::opencl::Device; + use co::prelude::*; #[test] fn it_works() { @@ -30,7 +30,7 @@ mod hardware_spec { assert!(match hardware.hardware_type() { Some(HardwareType::CPU) => true, - _ => false + _ => false, }); } @@ -42,19 +42,17 @@ mod hardware_spec { assert!(match hardware.name() { Some(_) => true, - _ => false + _ => false, }); } #[test] fn it_sets_compute_units() { - let hardware = Device::from_isize(42) - .set_compute_units(Some(400)) - .build(); + let hardware = Device::from_isize(42).set_compute_units(Some(400)).build(); assert!(match hardware.compute_units() { Some(400) => true, - _ => false + _ => false, }); } } diff --git a/coaster/tests/shared_memory_specs.rs b/coaster/tests/shared_memory_specs.rs index e65699fb2..9017283f6 100644 --- a/coaster/tests/shared_memory_specs.rs +++ b/coaster/tests/shared_memory_specs.rs @@ -3,9 +3,9 @@ use libc; #[cfg(test)] mod shared_memory_spec { + use super::co::frameworks::native::flatbox::FlatBox; use super::co::prelude::*; use super::co::tensor::Error; - use super::co::frameworks::native::flatbox::FlatBox; #[cfg(features = "cuda")] fn write_to_memory(mem: &mut FlatBox, data: &[T]) { @@ -49,16 +49,22 @@ mod shared_memory_spec { let ntv = Native::new(); let cpu = ntv.new_device(ntv.hardwares()).unwrap(); let mut shared_data = SharedTensor::::new(&10); - assert_eq!(shared_data.read(&cpu).unwrap_err(), - Error::UninitializedMemory); - assert_eq!(shared_data.read_write(&cpu).unwrap_err(), - Error::UninitializedMemory); + assert_eq!( + shared_data.read(&cpu).unwrap_err(), + Error::UninitializedMemory + ); + assert_eq!( + shared_data.read_write(&cpu).unwrap_err(), + Error::UninitializedMemory + ); shared_data.write_only(&cpu).unwrap(); shared_data.drop(&cpu).unwrap(); - assert_eq!(shared_data.read(&cpu).unwrap_err(), - Error::UninitializedMemory); + assert_eq!( + shared_data.read(&cpu).unwrap_err(), + Error::UninitializedMemory + ); } #[test] @@ -69,15 +75,16 @@ mod shared_memory_spec { let cu_device = cu.new_device(&cu.hardwares()[0..1]).unwrap(); let nt_device = nt.new_device(nt.hardwares()).unwrap(); let mut mem = SharedTensor::::new(&3); - write_to_memory(mem.write_only(&nt_device).unwrap(), - &[1.0f64, 2.0, 123.456]); + write_to_memory(mem.write_only(&nt_device).unwrap(), &[1.0f64, 2.0, 123.456]); mem.read(&cu_device).unwrap(); // It has successfully synced to the device. // Not the other way around. mem.drop(&nt_device).unwrap(); - assert_eq!(mem.read(&nt_device).unwrap().as_slice::(), - [1.0, 2.0, 123.456]); + assert_eq!( + mem.read(&nt_device).unwrap().as_slice::(), + [1.0, 2.0, 123.456] + ); } #[test] @@ -88,15 +95,16 @@ mod shared_memory_spec { let cl_device = cl.new_device(&cl.hardwares()[0..1]).unwrap(); let nt_device = nt.new_device(nt.hardwares()).unwrap(); let mut mem = SharedTensor::::new(&3); - write_to_memory(mem.write_only(&nt_device).unwrap(), - &[1.0f64, 2.0, 123.456]); + write_to_memory(mem.write_only(&nt_device).unwrap(), &[1.0f64, 2.0, 123.456]); mem.read(&cl_device).unwrap(); // It has not successfully synced to the device. // Not the other way around. mem.drop(&nt_device).unwrap(); - assert_eq!(mem.read(&nt_device).unwrap().as_slice::(), - [1.0, 2.0, 123.456]); + assert_eq!( + mem.read(&nt_device).unwrap().as_slice::(), + [1.0, 2.0, 123.456] + ); } #[test] diff --git a/coaster/tests/tensor_specs.rs b/coaster/tests/tensor_specs.rs index 1c6e747e4..1f4cb5b97 100644 --- a/coaster/tests/tensor_specs.rs +++ b/coaster/tests/tensor_specs.rs @@ -6,12 +6,12 @@ mod tensor_spec { #[test] fn it_returns_correct_tensor_desc_stride() { - let tensor_desc_r0: TensorDesc = vec!(); - let tensor_desc_r1: TensorDesc = vec!(5); - let tensor_desc_r2: TensorDesc = vec!(2, 4); - let tensor_desc_r3: TensorDesc = vec!(2, 2, 4); - let tensor_desc_r4: TensorDesc = vec!(2, 2, 4, 4); - let r0: Vec = vec!(); + let tensor_desc_r0: TensorDesc = vec![]; + let tensor_desc_r1: TensorDesc = vec![5]; + let tensor_desc_r2: TensorDesc = vec![2, 4]; + let tensor_desc_r3: TensorDesc = vec![2, 2, 4]; + let tensor_desc_r4: TensorDesc = vec![2, 2, 4, 4]; + let r0: Vec = vec![]; assert_eq!(r0, tensor_desc_r0.default_stride()); assert_eq!(vec![1], tensor_desc_r1.default_stride()); assert_eq!(vec![4, 1], tensor_desc_r2.default_stride()); @@ -22,7 +22,7 @@ mod tensor_spec { #[test] fn it_returns_correct_size_for_rank_0() { // In order for correct memory allocation of scala Tensor, the size should never return less than 1. - let tensor_desc_r0: TensorDesc = vec!(); + let tensor_desc_r0: TensorDesc = vec![]; assert_eq!(1, tensor_desc_r0.size()); let tensor_desc_r0_into = <() as IntoTensorDesc>::into(&()); diff --git a/greenglas/src/image/mod.rs b/greenglas/src/image/mod.rs index ab05215d8..506f96f6d 100644 --- a/greenglas/src/image/mod.rs +++ b/greenglas/src/image/mod.rs @@ -1,9 +1,9 @@ +use crate::image_crate::{load_from_memory, open, DynamicImage, ImageBuffer}; use std::path::Path; -use crate::image_crate::{DynamicImage, ImageBuffer, open, load_from_memory}; -use crate::{Set, Transformer}; -use crate::transformer::TransformerError; pub use self::modifiers::*; +use crate::transformer::TransformerError; +use crate::{Set, Transformer}; /// The Modifiers form `Image` pub mod modifiers; @@ -23,25 +23,26 @@ impl Transformer for Image { } impl Image { - /// Create a new Image from a DynamicImage pub fn new(image: DynamicImage) -> Image { - Image { - value: image - } + Image { value: image } } /// Create a new Image from a Path pub fn from_path

(path: P) -> Image - where P: AsRef + where + P: AsRef, { - Image { value: open(path).unwrap() } + Image { + value: open(path).unwrap(), + } } /// Create a new Image from Buffer - pub fn from_buffer(buf: &[u8]) -> Image - { - Image { value: load_from_memory(buf).unwrap() } + pub fn from_buffer(buf: &[u8]) -> Image { + Image { + value: load_from_memory(buf).unwrap(), + } } /// Create a new Image from RGB style pixel container such as `Vec` @@ -49,7 +50,7 @@ impl Image { let dynamic_image = ImageBuffer::from_raw(w, h, buf).map(DynamicImage::ImageRgb8); match dynamic_image { Some(image) => Ok(Image { value: image }), - None => Err(TransformerError::InvalidRgbPixels) + None => Err(TransformerError::InvalidRgbPixels), } } @@ -58,7 +59,7 @@ impl Image { let dynamic_image = ImageBuffer::from_raw(w, h, buf).map(DynamicImage::ImageRgba8); match dynamic_image { Some(image) => Ok(Image { value: image }), - None => Err(TransformerError::InvalidRgbaPixels) + None => Err(TransformerError::InvalidRgbaPixels), } } @@ -67,7 +68,7 @@ impl Image { let dynamic_image = ImageBuffer::from_raw(w, h, buf).map(DynamicImage::ImageLuma8); match dynamic_image { Some(image) => Ok(Image { value: image }), - None => Err(TransformerError::InvalidLumaPixels) + None => Err(TransformerError::InvalidLumaPixels), } } @@ -76,7 +77,7 @@ impl Image { let dynamic_image = ImageBuffer::from_raw(w, h, buf).map(DynamicImage::ImageLumaA8); match dynamic_image { Some(image) => Ok(Image { value: image }), - None => Err(TransformerError::InvalidLumaAlphaPixels) + None => Err(TransformerError::InvalidLumaAlphaPixels), } } } diff --git a/greenglas/src/image/modifiers.rs b/greenglas/src/image/modifiers.rs index a5b38f1e0..a501dc56e 100644 --- a/greenglas/src/image/modifiers.rs +++ b/greenglas/src/image/modifiers.rs @@ -1,6 +1,6 @@ -use crate::modifier::Modifier; -use crate::image_crate::imageops::FilterType; use super::Image; +use crate::image_crate::imageops::FilterType; +use crate::modifier::Modifier; #[derive(Debug, Clone, Copy)] /// Resize Modifier for `Image` @@ -13,7 +13,9 @@ pub struct Resize { impl Modifier for Resize { fn modify(self, image: &mut Image) { - image.value = image.value.resize(self.width, self.height, FilterType::Triangle) + image.value = image + .value + .resize(self.width, self.height, FilterType::Triangle) } } diff --git a/greenglas/src/lib.rs b/greenglas/src/lib.rs index e1fa0d338..3debeecab 100644 --- a/greenglas/src/lib.rs +++ b/greenglas/src/lib.rs @@ -14,23 +14,22 @@ )] extern crate coaster as co; -extern crate murmurhash3 as murmur3; extern crate image as image_crate; +extern crate murmurhash3 as murmur3; pub use crate::image::Image; -pub use crate::word::Word; pub use crate::transformer::Transformer; +pub use crate::word::Word; pub use crate::modifier::Set; -/// Transformer -pub mod transformer; /// The Image Struct and its Modifiers pub mod image; +/// Transformer +pub mod transformer; /// The Word Struct and its Modifiers pub mod word; - /// Re-exports from the modifier crate. pub mod modifier { extern crate modifier as modifier_crate; diff --git a/greenglas/src/transformer.rs b/greenglas/src/transformer.rs index edc286d97..dc72e4494 100644 --- a/greenglas/src/transformer.rs +++ b/greenglas/src/transformer.rs @@ -1,12 +1,11 @@ -use crate::co::prelude::*; use crate::co::plugin::numeric_helpers::*; +use crate::co::prelude::*; /// The Transformer Trait /// /// Gets implemented for all Transformable Data Types. /// Allows all Transformable Data Types to get transformed into a `Blob`. pub trait Transformer { - /// Transforms non-numeric data into a numeric `SharedTensor` /// /// The shape attribute is used to control the dimensions/shape of the Blob. @@ -27,18 +26,27 @@ pub trait Transformer { fn transform_to_vec(&self) -> Vec; /// Write into a native Coaster Memory. - fn write_to_memory(mem: &mut FlatBox, data: &[T]) -> Result<(), TransformerError> { + fn write_to_memory( + mem: &mut FlatBox, + data: &[T], + ) -> Result<(), TransformerError> { Self::write_to_memory_offset(mem, data, 0) } /// Write into a native Coaster Memory with a offset. - fn write_to_memory_offset(mem: &mut FlatBox, data: &[T], offset: usize) -> Result<(), TransformerError> { + fn write_to_memory_offset( + mem: &mut FlatBox, + data: &[T], + offset: usize, + ) -> Result<(), TransformerError> { let mem_buffer = mem.as_mut_slice::(); if offset == 0 && mem_buffer.len() != data.len() { return Err(TransformerError::InvalidShape); } for (index, datum) in data.iter().enumerate() { - let old_val = mem_buffer.get_mut(index + offset).ok_or(TransformerError::InvalidShape)?; + let old_val = mem_buffer + .get_mut(index + offset) + .ok_or(TransformerError::InvalidShape)?; *old_val = cast(*datum).unwrap(); } Ok(()) diff --git a/greenglas/src/word/mod.rs b/greenglas/src/word/mod.rs index a63fd340f..1fcf89377 100644 --- a/greenglas/src/word/mod.rs +++ b/greenglas/src/word/mod.rs @@ -1,7 +1,6 @@ use crate::murmur3::murmurhash3_x86_32 as murmur3; use crate::{Set, Transformer}; - /// The Modifiers for `Word` pub mod modifiers; diff --git a/greenglas/src/word/modifiers.rs b/greenglas/src/word/modifiers.rs index e69de29bb..8b1378917 100644 --- a/greenglas/src/word/modifiers.rs +++ b/greenglas/src/word/modifiers.rs @@ -0,0 +1 @@ + diff --git a/greenglas/tests/image_spec.rs b/greenglas/tests/image_spec.rs index 4619c5b23..76ad72bcd 100644 --- a/greenglas/tests/image_spec.rs +++ b/greenglas/tests/image_spec.rs @@ -3,20 +3,29 @@ extern crate greenglas; #[cfg(test)] mod image_spec { - use greenglas::{Set, Transformer, Image}; - use greenglas::image::{Resize, Crop}; + use greenglas::image::{Crop, Resize}; + use greenglas::{Image, Set, Transformer}; use std::path::Path; fn expected_result() -> Vec { - vec![255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0] + vec![ + 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0, + ] } fn expected_result_with_alpha() -> Vec { - vec![255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0, 255.0] + vec![ + 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, + 0.0, 0.0, 0.0, 255.0, + ] } - fn expected_result_resize() -> Vec { vec![191.0, 191.0, 191.0] } - fn expected_result_crop() -> Vec { vec![255.0, 255.0, 255.0] } + fn expected_result_resize() -> Vec { + vec![191.0, 191.0, 191.0] + } + fn expected_result_crop() -> Vec { + vec![255.0, 255.0, 255.0] + } #[test] fn it_works_for_pixels_rgb() { @@ -24,17 +33,19 @@ mod image_spec { let img = Image::from_rgb_pixels(2, 2, buffer); match img { Ok(i) => assert_eq!(expected_result(), i.transform_to_vec()), - Err(_) => assert!(false) + Err(_) => assert!(false), } } #[test] fn it_works_for_pixels_rgba() { - let buffer: Vec = vec![255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 255]; + let buffer: Vec = vec![ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 255, + ]; let img = Image::from_rgba_pixels(2, 2, buffer); match img { Ok(i) => assert_eq!(expected_result_with_alpha(), i.transform_to_vec()), - Err(_) => assert!(false) + Err(_) => assert!(false), } } @@ -44,7 +55,7 @@ mod image_spec { let img = Image::from_luma_pixels(3, 4, buffer); match img { Ok(i) => assert_eq!(expected_result(), i.transform_to_vec()), - Err(_) => assert!(false) + Err(_) => assert!(false), } } @@ -54,7 +65,7 @@ mod image_spec { let img = Image::from_lumaa_pixels(3, 2, buffer); match img { Ok(i) => assert_eq!(expected_result(), i.transform_to_vec()), - Err(_) => assert!(false) + Err(_) => assert!(false), } } @@ -98,7 +109,10 @@ mod image_spec { fn it_works_to_resize() { let path = Path::new("tests/assets/test_image.png"); let mut img = Image::from_path(&path); - let resize = Resize { width: 1, height: 1 }; + let resize = Resize { + width: 1, + height: 1, + }; img = img.set(resize); assert_eq!(expected_result_resize(), img.transform_to_vec()); } @@ -107,7 +121,12 @@ mod image_spec { fn it_works_to_crop() { let path = Path::new("tests/assets/test_image.png"); let mut img = Image::from_path(&path); - let crop = Crop { x: 0, y: 0, width: 1, height: 1 }; + let crop = Crop { + x: 0, + y: 0, + width: 1, + height: 1, + }; img = img.set(crop); assert_eq!(expected_result_crop(), img.transform_to_vec()); } diff --git a/greenglas/tests/transformer_spec.rs b/greenglas/tests/transformer_spec.rs index 7e8385453..e9e5f5500 100644 --- a/greenglas/tests/transformer_spec.rs +++ b/greenglas/tests/transformer_spec.rs @@ -1,17 +1,19 @@ -extern crate greenglas; extern crate coaster; +extern crate greenglas; #[cfg(test)] mod transformer_spec { - use greenglas::{Set, Transformer, Image}; - use greenglas::image::{Crop}; - use greenglas::transformer::TransformerError; use coaster::prelude::*; + use greenglas::image::Crop; + use greenglas::transformer::TransformerError; + use greenglas::{Image, Set, Transformer}; use std::path::Path; fn expected_result() -> Vec { - vec![255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0] + vec![ + 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 255.0, 0.0, 0.0, 0.0, + ] } #[test] @@ -20,7 +22,7 @@ mod transformer_spec { let img = Image::from_path(&path); match img.transform(&vec![2, 2, 3]) { Ok(_) => assert!(true), - _ => assert!(false) + _ => assert!(false), } } @@ -33,8 +35,8 @@ mod transformer_spec { let native_backend = Backend::::default().unwrap(); let data = tensor.read(native_backend.device()).unwrap().as_slice(); assert_eq!(expected_result(), data); - }, - _ => assert!(false) + } + _ => assert!(false), } } @@ -44,7 +46,7 @@ mod transformer_spec { let img = Image::from_path(&path); match img.transform(&vec![3, 3, 3]) { Err(TransformerError::InvalidShape) => assert!(true), - _ => assert!(false) + _ => assert!(false), } } @@ -52,10 +54,15 @@ mod transformer_spec { fn transform_returns_a_valid_result_with_modifiers() { let path = Path::new("tests/assets/test_image.png"); let img = Image::from_path(&path); - let crop = Crop { x: 0, y: 0, width: 1, height: 1 }; + let crop = Crop { + x: 0, + y: 0, + width: 1, + height: 1, + }; match img.set(crop).transform(&vec![1, 1, 3]) { Ok(_) => assert!(true), - _ => assert!(false) + _ => assert!(false), } } } diff --git a/greenglas/tests/word_spec.rs b/greenglas/tests/word_spec.rs index 350647314..6de1290c6 100644 --- a/greenglas/tests/word_spec.rs +++ b/greenglas/tests/word_spec.rs @@ -3,7 +3,7 @@ extern crate greenglas; #[cfg(test)] mod word_spec { - use greenglas::{ Transformer, Word }; + use greenglas::{Transformer, Word}; fn expected_result() -> Vec { vec![3127628307.0] @@ -11,6 +11,9 @@ mod word_spec { #[test] fn it_works() { - assert_eq!(expected_result(), Word::new("test".to_string()).transform_to_vec()); + assert_eq!( + expected_result(), + Word::new("test".to_string()).transform_to_vec() + ); } } diff --git a/juice-examples/juice-utils/src/lib.rs b/juice-examples/juice-utils/src/lib.rs index 15e94c75e..f66599c24 100644 --- a/juice-examples/juice-utils/src/lib.rs +++ b/juice-examples/juice-utils/src/lib.rs @@ -1,10 +1,14 @@ use flate2::read::GzDecoder; -use reqwest::blocking::Client; use fs_err as fs; -use std::io::prelude::*; +use reqwest::blocking::Client; use std::io; +use std::io::prelude::*; -pub fn download_datasets(datasets: &[&str], asset_path: &str, base_url: &str) -> Result<(), Box> { +pub fn download_datasets( + datasets: &[&str], + asset_path: &str, + base_url: &str, +) -> Result<(), Box> { let client = Client::new(); std::fs::create_dir_all(asset_path)?; for dataset in datasets { diff --git a/juice-examples/mackey-glass-rnn-regression/src/main.rs b/juice-examples/mackey-glass-rnn-regression/src/main.rs index 0cdc6c2a7..078b95f45 100644 --- a/juice-examples/mackey-glass-rnn-regression/src/main.rs +++ b/juice-examples/mackey-glass-rnn-regression/src/main.rs @@ -64,7 +64,6 @@ const DATA_COLUMNS: usize = 10; // Provide an Iterator over the input data fn data_generator(data: DataMode) -> impl Iterator)> { - let rdr = Reader::from_reader(File::open(data.as_path()).unwrap()); rdr.into_deserialize().map(move |row| match row { Ok(value) => { diff --git a/juice-examples/mnist-image-multiclass-classification/src/main.rs b/juice-examples/mnist-image-multiclass-classification/src/main.rs index ba5def931..d306d318c 100644 --- a/juice-examples/mnist-image-multiclass-classification/src/main.rs +++ b/juice-examples/mnist-image-multiclass-classification/src/main.rs @@ -72,12 +72,14 @@ fn main() { "train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz", "t10k-images-idx3-ubyte.gz", - "t10k-labels-idx1-ubyte.gz" + "t10k-labels-idx1-ubyte.gz", ]; download_datasets( &datasets, &"./assets/mnist/", - "http://yann.lecun.com/exdb/mnist/").unwrap(); + "http://yann.lecun.com/exdb/mnist/", + ) + .unwrap(); unzip_datasets(&datasets, &"./assets/mnist/").unwrap(); } @@ -94,7 +96,8 @@ fn main() { &datasets, &"./assets/mnist-fashion/", "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com", - ).unwrap(); + ) + .unwrap(); println!("{}", "Fashion MNIST dataset downloaded".to_string()); unzip_datasets(&datasets, &"./assets/mnist-fashion/").unwrap(); @@ -104,7 +107,7 @@ fn main() { } } else if args.cmd_mnist { #[cfg(all(feature = "cuda"))] - run_mnist( + run_mnist( MnistType::Numbers, args.arg_model_name, args.arg_batch_size, @@ -112,12 +115,12 @@ fn main() { args.arg_momentum, ); #[cfg(not(feature = "cuda"))] - { - println!( + { + println!( "Right now, you really need cuda! Not all features are available for all backends and as such, this one -as of now - only works with cuda." ); - panic!() - } + panic!() + } } else if args.cmd_fashion { run_mnist( MnistType::Fashion, @@ -130,7 +133,11 @@ fn main() { } #[cfg(all(feature = "cuda"))] -fn add_conv_net(mut net_cfg: SequentialConfig, batch_size: usize, pixel_dim: usize) -> SequentialConfig { +fn add_conv_net( + mut net_cfg: SequentialConfig, + batch_size: usize, + pixel_dim: usize, +) -> SequentialConfig { net_cfg.add_layer(LayerConfig::new( "reshape", ReshapeConfig::of_shape(&[batch_size, 1, pixel_dim, pixel_dim]), @@ -166,7 +173,11 @@ fn add_conv_net(mut net_cfg: SequentialConfig, batch_size: usize, pixel_dim: usi } #[cfg(not(feature = "cuda"))] -fn add_conv_net(_net_cfg: SequentialConfig, _batch_size: usize, _pixel_dim: usize) -> SequentialConfig { +fn add_conv_net( + _net_cfg: SequentialConfig, + _batch_size: usize, + _pixel_dim: usize, +) -> SequentialConfig { println!( "Currently Juice does not have a native pooling function to use with Conv Nets - you can either try the CUDA implementation, or use a different type of layer" @@ -174,7 +185,11 @@ fn add_conv_net(_net_cfg: SequentialConfig, _batch_size: usize, _pixel_dim: usiz panic!() } -fn add_mlp(mut net_cfg: SequentialConfig, batch_size: usize, pixel_count: usize) -> SequentialConfig { +fn add_mlp( + mut net_cfg: SequentialConfig, + batch_size: usize, + pixel_count: usize, +) -> SequentialConfig { net_cfg.add_layer(LayerConfig::new( "reshape", LayerType::Reshape(ReshapeConfig::of_shape(&[batch_size, pixel_count])), @@ -199,7 +214,6 @@ fn add_linear_net(mut net_cfg: SequentialConfig) -> SequentialConfig { net_cfg } - fn run_mnist( mnist_type: MnistType, model_name: Option, @@ -214,7 +228,7 @@ fn run_mnist( let asset_path = match mnist_type { MnistType::Fashion => "./assets/mnist-fashion", - MnistType::Numbers => "./assets/mnist" + MnistType::Numbers => "./assets/mnist", }; let Mnist { @@ -258,9 +272,9 @@ fn run_mnist( // set up backends #[cfg(all(feature = "cuda"))] - let backend = Rc::new(get_cuda_backend()); + let backend = Rc::new(get_cuda_backend()); #[cfg(not(feature = "cuda"))] - let backend = Rc::new(get_native_backend()); + let backend = Rc::new(get_native_backend()); // set up solver let mut solver_cfg = SolverConfig { @@ -288,7 +302,7 @@ fn run_mnist( let mut targets = Vec::new(); for (batch_n, (label_val, ref input)) in - decoded_images.by_ref().take(batch_size).enumerate() + decoded_images.by_ref().take(batch_size).enumerate() { let mut input_tensor = inp_lock.write().unwrap(); let mut label_tensor = label_lock.write().unwrap(); diff --git a/juice/src/layer.rs b/juice/src/layer.rs index dad9fbcce..fe62ff1c3 100644 --- a/juice/src/layer.rs +++ b/juice/src/layer.rs @@ -539,8 +539,7 @@ impl Layer { if old_shape.size() != reshaped_shape.size() { panic!( "Input Shape Mismatch\nExpected {:?}\nActual {:?}", - reshaped_shape, - old_shape + reshaped_shape, old_shape ); } self.input_blobs_data[input_i] diff --git a/juice/src/layers/common/convolution.rs b/juice/src/layers/common/convolution.rs index 24f194809..d0c2b32f4 100644 --- a/juice/src/layers/common/convolution.rs +++ b/juice/src/layers/common/convolution.rs @@ -21,7 +21,7 @@ use crate::conn; use crate::conn::ConvolutionConfig as connConvolutionConfig; use crate::juice_capnp::convolution_config as capnp_config; use crate::layer::*; -use crate::util::{ArcLock, cast_vec_usize_to_i32}; +use crate::util::{cast_vec_usize_to_i32, ArcLock}; use crate::weight::FillerType; use super::FilterLayer; @@ -359,8 +359,8 @@ impl<'a> CapnpRead<'a> for ConvolutionConfig { mod tests { use crate::co::*; - use super::{Convolution, ConvolutionConfig}; use super::super::FilterLayer; + use super::{Convolution, ConvolutionConfig}; #[test] #[cfg(feature = "cuda")] diff --git a/juice/src/layers/common/linear.rs b/juice/src/layers/common/linear.rs index a22352ac2..7f09bfda4 100644 --- a/juice/src/layers/common/linear.rs +++ b/juice/src/layers/common/linear.rs @@ -109,7 +109,7 @@ impl> ILayer for Linear { // is stated in https://cs231n.github.io/neural-networks-2/#init for non-LSTM types. let initialisation_constant = rand::random::(); let filler = FillerType::Constant { - value: initialisation_constant * (2.0 / initialisation_constant).sqrt() + value: initialisation_constant * (2.0 / initialisation_constant).sqrt(), }; filler.fill(&mut weight.write().unwrap()); } diff --git a/juice/src/layers/common/rnn.rs b/juice/src/layers/common/rnn.rs index 7e59a9596..1f77e0de3 100644 --- a/juice/src/layers/common/rnn.rs +++ b/juice/src/layers/common/rnn.rs @@ -47,7 +47,7 @@ use crate::conn; use crate::conn::RnnConfig as connRnnConfig; use crate::juice_capnp::rnn_config as capnp_config; use crate::layer::*; -use crate::util::{ArcLock, native_backend}; +use crate::util::{native_backend, ArcLock}; use crate::weight::FillerType; #[derive(Debug, Clone)] @@ -137,10 +137,7 @@ impl> ILayer for Rnn { .unwrap(); let filter_dimensions: TensorDesc = backend - .generate_rnn_weight_description( - &config, - batch_size as i32, - input_size as i32) + .generate_rnn_weight_description(&config, batch_size as i32, input_size as i32) .unwrap(); weights_data[0].write().unwrap().resize(&filter_dimensions).unwrap(); @@ -151,9 +148,7 @@ impl> ILayer for Rnn { output_size: batch_size * self.num_layers * self.hidden_size, }; - let bias_filler = FillerType::Constant { - value: 1.0 - }; + let bias_filler = FillerType::Constant { value: 1.0 }; filler.fill(&mut weights_data[0].write().unwrap()); bias_filler.fill(&mut weights_data[1].write().unwrap()); @@ -204,11 +199,7 @@ impl> ComputeOutput for Rnn { let rnn_config = self.rnn_config.as_ref().unwrap(); let mut workspace = self.workspace.as_ref().unwrap().write().unwrap(); backend - .rnn_forward(&input_data[0], - output_data[0], - rnn_config, - weights[0], - &mut workspace) + .rnn_forward(&input_data[0], output_data[0], rnn_config, weights[0], &mut workspace) .unwrap(); } } @@ -232,9 +223,7 @@ impl> ComputeInputGradient for Rnn { let input_size = input_shape[1]; let sequence_length = input_shape[2]; let native_backend = native_backend(); - let readable_input = src - .read(native_backend.device()).unwrap() - .as_slice::().to_vec(); + let readable_input = src.read(native_backend.device()).unwrap().as_slice::().to_vec(); backend .rnn_backward_data( @@ -359,12 +348,12 @@ impl<'a> CapnpRead<'a> for RnnConfig { mod tests { use std::rc::Rc; - use conn::{DirectionMode, RnnAlgorithm, RnnInputMode, RnnNetworkMode}; use conn::Rnn as coRnn; + use conn::{DirectionMode, RnnAlgorithm, RnnInputMode, RnnNetworkMode}; - use crate::co::*; #[cfg(feature = "cuda")] use crate::co::frameworks::cuda::get_cuda_backend as cuda_backend; + use crate::co::*; use crate::layer::ILayer; use crate::util::native_backend; use crate::weight::FillerType; diff --git a/juice/src/layers/loss/mean_squared_error.rs b/juice/src/layers/loss/mean_squared_error.rs index 7964dfb6d..e9410066e 100644 --- a/juice/src/layers/loss/mean_squared_error.rs +++ b/juice/src/layers/loss/mean_squared_error.rs @@ -98,7 +98,7 @@ impl> ComputeInputGradient for MeanSquaredEr &native_scalar(-2f32), &mut writable_input, ) - .unwrap(); + .unwrap(); write_to_memory( input_gradients[0].write_only(native.device()).unwrap(), diff --git a/juice/src/util.rs b/juice/src/util.rs index bfee7acf9..581fb9f58 100644 --- a/juice/src/util.rs +++ b/juice/src/util.rs @@ -41,9 +41,7 @@ pub fn write_to_memory_offset(mem: &mut FlatBo /// is assumed to be the batchsize. /// /// Allocates memory on a Native Backend if neccessary. -pub fn write_batch_sample( - tensor: &mut SharedTensor, - data: &[T], i: usize) { +pub fn write_batch_sample(tensor: &mut SharedTensor, data: &[T], i: usize) { let native_backend = native_backend(); let tensor_desc = tensor.desc(); let batch_size = tensor_desc[0]; diff --git a/juice/tests/layer_specs.rs b/juice/tests/layer_specs.rs index 971a026ee..b05f7a2f5 100644 --- a/juice/tests/layer_specs.rs +++ b/juice/tests/layer_specs.rs @@ -177,7 +177,6 @@ mod layer_spec { assert_slice_eq!(&[0.51], &[0.51], 0.00000001); } - #[test] #[should_panic] fn macro_test_assert_slice_eq_not() { diff --git a/magic.yml b/magic.yml new file mode 100644 index 000000000..23c2738ab --- /dev/null +++ b/magic.yml @@ -0,0 +1,18 @@ +platform: linux + +image_resource: + type: registry-image + source: + repository: quay.io/spearow/machine-learning-container-fedora-cuda + tag: latest + +inputs: +- name: juice + +caches: +- path: cargo_home + +run: + path: bash + dir: juice + diff --git a/rcublas/cublas-sys/build.rs b/rcublas/cublas-sys/build.rs index 45083b18a..34d3f78d4 100644 --- a/rcublas/cublas-sys/build.rs +++ b/rcublas/cublas-sys/build.rs @@ -55,14 +55,13 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); - #[cfg(feature="generate")] + #[cfg(feature = "generate")] { println!("cargo:warning=Running bindgen(cublas-sys), make sure to have all required host libs installed!"); use std::path::PathBuf; - let include_dir = include_dir - .unwrap_or_else(|| String::from("/usr/include/cuda")); + let include_dir = include_dir.unwrap_or_else(|| String::from("/usr/include/cuda")); let bindings = bindgen::Builder::default() .rust_target(bindgen::RustTarget::Stable_1_40) @@ -79,13 +78,13 @@ fn main() { .size_t_is_usize(true) .clang_arg("-I") .clang_arg(include_dir) - .header( "wrapper.h") + .header("wrapper.h") .rustified_non_exhaustive_enum("cublas[A-Za-z]+_t") .rustified_non_exhaustive_enum("cuda.*") .whitelist_function("cu.*") .whitelist_var("CUBLAS.*") .whitelist_type("[Cc][Uu].*") - .default_alias_style(bindgen::AliasVariation::TypeAlias ) + .default_alias_style(bindgen::AliasVariation::TypeAlias) .parse_callbacks(Box::new(bindgen::CargoCallbacks)) .rustfmt_bindings(true) .generate() diff --git a/rcublas/cublas-sys/src/generated.rs b/rcublas/cublas-sys/src/generated.rs index eff59ad96..b450204c4 100644 --- a/rcublas/cublas-sys/src/generated.rs +++ b/rcublas/cublas-sys/src/generated.rs @@ -1,12 +1,10 @@ /* automatically generated by rust-bindgen */ - //! Defines the FFI for CUDA cuBLAS. //! #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(non_upper_case_globals)] - pub const CUBLAS_VER_MAJOR: u32 = 11; pub const CUBLAS_VER_MINOR: u32 = 4; diff --git a/rcublas/cublas/src/api/context.rs b/rcublas/cublas/src/api/context.rs index 3370af584..aae8c714d 100644 --- a/rcublas/cublas/src/api/context.rs +++ b/rcublas/cublas/src/api/context.rs @@ -1,6 +1,6 @@ -use crate::ffi::*; -use crate::{API, Error}; use super::{Operation, PointerMode}; +use crate::ffi::*; +use crate::{Error, API}; #[derive(Debug, Clone)] /// Provides a the low-level cuBLAS context. @@ -147,34 +147,19 @@ impl Context { ldc: i32, ) -> Result<(), Error> { API::gemm( - self, - transa, - transb, - m, - n, - k, - alpha, - a, - lda, - b, - ldb, - beta, - c, - ldc, + self, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ) } - pub fn get_version( - &self - ) -> i32 { + pub fn get_version(&self) -> i32 { API::get_version(self).unwrap() } } #[cfg(test)] mod test { - use super::*; use super::super::PointerMode; + use super::*; use crate::chore::*; #[test] diff --git a/rcublas/cublas/src/api/enums.rs b/rcublas/cublas/src/api/enums.rs index b663ad7fb..9d6384096 100644 --- a/rcublas/cublas/src/api/enums.rs +++ b/rcublas/cublas/src/api/enums.rs @@ -11,7 +11,7 @@ impl PointerMode { match in_mode { cublasPointerMode_t::CUBLAS_POINTER_MODE_HOST => PointerMode::Host, cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE => PointerMode::Device, - _ => unreachable!("wrapping library is newer than this impl, please file a BUG") + _ => unreachable!("wrapping library is newer than this impl, please file a BUG"), } } @@ -36,7 +36,7 @@ impl Operation { cublasOperation_t::CUBLAS_OP_N => Operation::NoTrans, cublasOperation_t::CUBLAS_OP_T => Operation::Trans, cublasOperation_t::CUBLAS_OP_C => Operation::ConjTrans, - _ => unreachable!("wrapping library is newer than this impl, please file a BUG") + _ => unreachable!("wrapping library is newer than this impl, please file a BUG"), } } diff --git a/rcublas/cublas/src/api/level1.rs b/rcublas/cublas/src/api/level1.rs index b75041a29..4b14b6b78 100644 --- a/rcublas/cublas/src/api/level1.rs +++ b/rcublas/cublas/src/api/level1.rs @@ -1,6 +1,6 @@ -use crate::{API, Error}; use super::Context; use crate::ffi::*; +use crate::{Error, API}; impl API { // TODO: cublasIsamax_v2 x 4 @@ -38,8 +38,10 @@ impl API { cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => Err(Error::AllocFailed), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to calculate sum of x.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to calculate sum of x.", + status as i32 as u64, + )), } } @@ -81,8 +83,10 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to calculate axpy (alpha * x + y).", + status as i32 as u64, + )), } } @@ -121,8 +125,10 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to calculate copy from x to y.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to calculate copy from x to y.", + status as i32 as u64, + )), } } @@ -157,8 +163,10 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to calculate dot product of x and y.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to calculate dot product of x and y.", + status as i32 as u64, + )), } } @@ -189,15 +197,16 @@ impl API { cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => { dbg!("Alloc failed"); Err(Error::AllocFailed) - }, + } cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), status => { dbg!("Unknown!"); Err(Error::Unknown( - "Unable to calculate the euclidian norm of x.", status as i32 as u64 + "Unable to calculate the euclidian norm of x.", + status as i32 as u64, )) - }, + } } } @@ -232,8 +241,10 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to scale the vector x.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to scale the vector x.", + status as i32 as u64, + )), } } @@ -266,19 +277,21 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to swap vector x and y.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to swap vector x and y.", + status as i32 as u64, + )), } } } #[cfg(test)] mod test { - use crate::API; use crate::api::context::Context; use crate::api::enums::PointerMode; - use crate::co::tensor::SharedTensor; use crate::chore::*; + use crate::co::tensor::SharedTensor; + use crate::API; #[test] fn use_cuda_memory_for_asum() { @@ -346,7 +359,6 @@ mod test { } } - let native_y = y.read(native.device()).unwrap(); assert_eq!(&[7f32, 7f32, 7f32, 7f32, 7f32], native_y.as_slice::()); @@ -381,7 +393,6 @@ mod test { } } - let native_y = y.read(native.device()).unwrap(); assert_eq!(&[2f32, 2f32, 2f32, 2f32, 2f32], native_y.as_slice::()); @@ -455,7 +466,6 @@ mod test { } } - let native_result = result.read(native.device()).unwrap(); assert_eq!(&[3f32], native_result.as_slice::()); @@ -489,7 +499,6 @@ mod test { } } - let native_x = x.read(native.device()).unwrap(); assert_eq!(&[5f32, 5f32, 5f32], native_x.as_slice::()); diff --git a/rcublas/cublas/src/api/level3.rs b/rcublas/cublas/src/api/level3.rs index b978dda60..552655797 100644 --- a/rcublas/cublas/src/api/level3.rs +++ b/rcublas/cublas/src/api/level3.rs @@ -1,7 +1,7 @@ -use crate::{API, Error}; use super::Context; use super::Operation; use crate::ffi::*; +use crate::{Error, API}; impl API { /// Performs a general matrix-matrix multiplication. @@ -65,42 +65,31 @@ impl API { ldc: i32, ) -> Result<(), Error> { match cublasSgemm_v2( - handle, - transa, - transb, - m, - n, - k, - alpha, - a, - lda, - b, - ldb, - beta, - c, - ldc, + handle, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - cublasStatus_t::CUBLAS_STATUS_INVALID_VALUE => Err( - Error::InvalidValue("m, n, or k < 0"), - ), + cublasStatus_t::CUBLAS_STATUS_INVALID_VALUE => { + Err(Error::InvalidValue("m, n, or k < 0")) + } cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed), - status => Err(Error::Unknown("Unable to calculate axpy (alpha * x + y).", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to calculate axpy (alpha * x + y).", + status as i32 as u64, + )), } } } #[cfg(test)] mod test { - use crate::ffi::*; - use crate::API; use crate::api::context::Context; use crate::api::enums::PointerMode; - use crate::co::tensor::SharedTensor; use crate::chore::*; + use crate::co::tensor::SharedTensor; + use crate::ffi::*; + use crate::API; #[test] fn use_cuda_memory_for_gemm() { @@ -169,7 +158,8 @@ mod test { beta_addr, c_addr, ldc, - ).unwrap(); + ) + .unwrap(); } } diff --git a/rcublas/cublas/src/api/util.rs b/rcublas/cublas/src/api/util.rs index 40e63d395..65402ddc8 100644 --- a/rcublas/cublas/src/api/util.rs +++ b/rcublas/cublas/src/api/util.rs @@ -1,7 +1,7 @@ -use crate::ffi::*; -use crate::{API, Error}; use super::Context; use super::PointerMode; +use crate::ffi::*; +use crate::{Error, API}; use lazy_static::lazy_static; use log::debug; use std::collections::HashSet; @@ -9,9 +9,7 @@ use std::convert::AsRef; use std::convert::TryFrom; use std::ptr; use std::ptr::NonNull; -use std::sync::{Mutex,Arc}; - - +use std::sync::{Arc, Mutex}; // TODO: // extract the cookie tracking into a separate crate @@ -21,10 +19,10 @@ use std::sync::{Mutex,Arc}; // * cudaMalloc / cudaFree // * cublasContext_new / _destroy // * cudnnContext_new / _destroy -#[derive(Hash,Eq,PartialEq)] +#[derive(Hash, Eq, PartialEq)] struct Cookie(NonNull); -unsafe impl std::marker::Send for Cookie { } +unsafe impl std::marker::Send for Cookie {} impl Cookie { fn as_ptr(&self) -> *mut cublasContext { @@ -34,7 +32,7 @@ impl Cookie { impl TryFrom for Cookie { type Error = Error; - fn try_from(handle: *mut cublasContext) -> std::result::Result { + fn try_from(handle: *mut cublasContext) -> std::result::Result { if let Some(nn) = NonNull::new(handle) { Ok(Cookie(nn)) } else { @@ -44,19 +42,16 @@ impl TryFrom for Cookie { } lazy_static! { - static ref TRACKER: Arc>> = { - Arc::new(Mutex::new(HashSet::with_capacity(3))) - }; + static ref TRACKER: Arc>> = + { Arc::new(Mutex::new(HashSet::with_capacity(3))) }; } - fn track(handle: cublasHandle_t) { let mut guard = TRACKER.as_ref().lock().unwrap(); let _ = guard.insert(Cookie::try_from(handle as *mut cublasContext).unwrap()); debug!("Added handle {:?}, total of {}", handle, guard.len()); } - fn untrack(handle: cublasHandle_t) { let mut guard = TRACKER.as_ref().lock().unwrap(); debug!("Removed handle {:?}, total of {}", handle, guard.len()); @@ -80,7 +75,6 @@ impl API { /// Creating contexts all the time can lead to performance problems. /// Generally one Context per GPU device and configuration is recommended. pub fn create() -> Result { - let handle = unsafe { API::ffi_create() }?; track(handle); Ok(Context::from_c(handle)) @@ -103,9 +97,7 @@ impl API { /// Get CUBLAS Version pub fn get_version(context: &Context) -> Result { - unsafe { - API::ffi_get_version(*context.id_c()) - } + unsafe { API::ffi_get_version(*context.id_c()) } } unsafe fn ffi_get_version(handle: cublasHandle_t) -> Result { @@ -114,23 +106,23 @@ impl API { match cublasGetVersion_v2(handle, version_ptr) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(version), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - status => Err(Error::Unknown("Other Unknown Error with CUBLAS Get Version", status as i32 as u64)), - + status => Err(Error::Unknown( + "Other Unknown Error with CUBLAS Get Version", + status as i32 as u64, + )), } } /// Retrieve the pointer mode for a given cuBLAS context. pub fn get_pointer_mode(context: &Context) -> Result { - Ok(PointerMode::from_c( - unsafe { API::ffi_get_pointer_mode(*context.id_c()) }?, - )) + Ok(PointerMode::from_c(unsafe { + API::ffi_get_pointer_mode(*context.id_c()) + }?)) } /// Set the pointer mode for a given cuBLAS context. pub fn set_pointer_mode(context: &mut Context, pointer_mode: PointerMode) -> Result<(), Error> { - Ok(unsafe { - API::ffi_set_pointer_mode(*context.id_c(), pointer_mode.as_c()) - }?) + Ok(unsafe { API::ffi_set_pointer_mode(*context.id_c(), pointer_mode.as_c()) }?) } unsafe fn ffi_create() -> Result { @@ -140,8 +132,9 @@ impl API { cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), cublasStatus_t::CUBLAS_STATUS_ARCH_MISMATCH => Err(Error::ArchMismatch), cublasStatus_t::CUBLAS_STATUS_ALLOC_FAILED => Err(Error::AllocFailed), - status => Err(Error::Unknown("Unable to create the cuBLAS context/resources.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create the cuBLAS context/resources.", + status as i32 as u64, )), } } @@ -150,8 +143,9 @@ impl API { match cublasDestroy_v2(handle) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - status => Err(Error::Unknown("Unable to destroy the CUDA cuDNN context/resources.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy the CUDA cuDNN context/resources.", + status as i32 as u64, )), } } @@ -161,8 +155,10 @@ impl API { match cublasGetPointerMode_v2(handle, pointer_mode.as_mut_ptr()) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(pointer_mode[0]), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - status => Err(Error::Unknown("Unable to get cuBLAS pointer mode.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to get cuBLAS pointer mode.", + status as i32 as u64, + )), } } @@ -173,8 +169,10 @@ impl API { match cublasSetPointerMode_v2(handle, pointer_mode) { cublasStatus_t::CUBLAS_STATUS_SUCCESS => Ok(()), cublasStatus_t::CUBLAS_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized), - status => Err(Error::Unknown("Unable to get cuBLAS pointer mode.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to get cuBLAS pointer mode.", + status as i32 as u64, + )), } } @@ -196,8 +194,8 @@ impl API { #[cfg(test)] mod test { use crate::ffi::cublasPointerMode_t; - use crate::API; use crate::Context; + use crate::API; #[test] fn manual_context_creation() { @@ -231,13 +229,15 @@ mod test { API::ffi_set_pointer_mode( *context.id_c(), cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE, - ).unwrap(); + ) + .unwrap(); let mode = API::ffi_get_pointer_mode(*context.id_c()).unwrap(); assert_eq!(cublasPointerMode_t::CUBLAS_POINTER_MODE_DEVICE, mode); API::ffi_set_pointer_mode( *context.id_c(), cublasPointerMode_t::CUBLAS_POINTER_MODE_HOST, - ).unwrap(); + ) + .unwrap(); let mode2 = API::ffi_get_pointer_mode(*context.id_c()).unwrap(); assert_eq!(cublasPointerMode_t::CUBLAS_POINTER_MODE_HOST, mode2); } diff --git a/rcublas/cublas/src/chore.rs b/rcublas/cublas/src/chore.rs index 3b43fae1c..34b8cd9a9 100644 --- a/rcublas/cublas/src/chore.rs +++ b/rcublas/cublas/src/chore.rs @@ -1,38 +1,35 @@ use crate::co::backend::{Backend, IBackend}; -use crate::co::frameworks::{Cuda, Native}; use crate::co::frameworks::native::flatbox::FlatBox; +use crate::co::frameworks::{Cuda, Native}; use crate::co::tensor::SharedTensor; use env_logger; pub fn test_setup() { - - let _ = env_logger::builder().is_test(true).try_init(); + let _ = env_logger::builder().is_test(true).try_init(); } -pub fn test_teardown() { - -} +pub fn test_teardown() {} pub fn get_native_backend() -> Backend { - Backend::::default().unwrap() + Backend::::default().unwrap() } pub fn get_cuda_backend() -> Backend { - Backend::::default().unwrap() + Backend::::default().unwrap() } pub fn write_to_memory(mem: &mut FlatBox, data: &[T]) { - let mem_buffer = mem.as_mut_slice::(); - for (index, datum) in data.iter().enumerate() { - mem_buffer[index] = *datum; - } + let mem_buffer = mem.as_mut_slice::(); + for (index, datum) in data.iter().enumerate() { + mem_buffer[index] = *datum; + } } pub fn filled_tensor(_backend: &B, n: usize, val: T) -> SharedTensor { - let mut x = SharedTensor::::new(&vec![n]); - let values: &[T] = &::std::iter::repeat(val) - .take(x.capacity()) - .collect::>(); - write_to_memory(x.write_only(get_native_backend().device()).unwrap(), values); - x -} \ No newline at end of file + let mut x = SharedTensor::::new(&vec![n]); + let values: &[T] = &::std::iter::repeat(val) + .take(x.capacity()) + .collect::>(); + write_to_memory(x.write_only(get_native_backend().device()).unwrap(), values); + x +} diff --git a/rcublas/cublas/src/error.rs b/rcublas/cublas/src/error.rs index 5fde9f5a1..d26761f69 100644 --- a/rcublas/cublas/src/error.rs +++ b/rcublas/cublas/src/error.rs @@ -19,7 +19,9 @@ pub enum Error { #[error("Invalid value: {0}")] InvalidValue(&'static str), /// Failure with the hardware architecture. - #[error("cuBLAS only supports devices with compute capabilities greater than or equal to 1.3.")] + #[error( + "cuBLAS only supports devices with compute capabilities greater than or equal to 1.3." + )] ArchMismatch, /// Failure with memory access or internal error/bug. #[error("There was an error accessing GPU memory.")] diff --git a/rcublas/cublas/src/lib.rs b/rcublas/cublas/src/lib.rs index ab2a286d3..df6654412 100644 --- a/rcublas/cublas/src/lib.rs +++ b/rcublas/cublas/src/lib.rs @@ -16,4 +16,4 @@ pub mod api; pub mod error; #[cfg(test)] -pub(crate) mod chore; \ No newline at end of file +pub(crate) mod chore; diff --git a/rcudnn/cudnn-sys/build.rs b/rcudnn/cudnn-sys/build.rs index 3d145572f..eef701825 100644 --- a/rcudnn/cudnn-sys/build.rs +++ b/rcudnn/cudnn-sys/build.rs @@ -55,14 +55,13 @@ fn main() { println!("cargo:rerun-if-changed=build.rs"); - #[cfg(feature="generate")] + #[cfg(feature = "generate")] { println!("cargo:warning=Running bindgen(cudnn-sys), make sure to have all required host libs installed!"); use std::path::PathBuf; - let include_dir = include_dir - .unwrap_or_else(|| String::from("/usr/include/cuda")); + let include_dir = include_dir.unwrap_or_else(|| String::from("/usr/include/cuda")); let bindings = bindgen::Builder::default() .rust_target(bindgen::LATEST_STABLE_RUST) diff --git a/rcudnn/cudnn-sys/src/generated.rs b/rcudnn/cudnn-sys/src/generated.rs index d28b6df8c..a5741c52f 100644 --- a/rcudnn/cudnn-sys/src/generated.rs +++ b/rcudnn/cudnn-sys/src/generated.rs @@ -1,12 +1,10 @@ /* automatically generated by rust-bindgen */ - //! Defines the FFI for CUDA cuDNN. //! #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(non_upper_case_globals)] - pub const CUDNN_MAJOR: u32 = 8; pub const CUDNN_MINOR: u32 = 1; diff --git a/rcudnn/cudnn-sys/src/lib.rs b/rcudnn/cudnn-sys/src/lib.rs index 21864996b..0a973975f 100644 --- a/rcudnn/cudnn-sys/src/lib.rs +++ b/rcudnn/cudnn-sys/src/lib.rs @@ -2,7 +2,6 @@ mod generated; pub use crate::generated::*; - impl Default for cudnnConvolutionFwdAlgoPerf_t { fn default() -> Self { Self { diff --git a/rcudnn/cudnn/benches/cudnn_overhead.rs b/rcudnn/cudnn/benches/cudnn_overhead.rs index ebaaf2b9c..b12420696 100644 --- a/rcudnn/cudnn/benches/cudnn_overhead.rs +++ b/rcudnn/cudnn/benches/cudnn_overhead.rs @@ -4,7 +4,6 @@ extern crate coaster as co; extern crate rcudnn; extern crate test; - #[cfg(test)] mod cudnn_spec { diff --git a/rcudnn/cudnn/src/api/activation.rs b/rcudnn/cudnn/src/api/activation.rs index b2b6fcfbf..3c9d86125 100644 --- a/rcudnn/cudnn/src/api/activation.rs +++ b/rcudnn/cudnn/src/api/activation.rs @@ -129,8 +129,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Activation Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN Activation Descriptor.", + status as i32 as u64, )), } } @@ -140,8 +141,9 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyActivationDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Activation Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Activation Descriptor.", + status as i32 as u64, )), } } @@ -157,8 +159,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), // FIXME - status => Err(Error::Unknown("Unable to set CUDA cuDNN Activation Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Activation Descriptor.", + status as i32 as u64, )), } } diff --git a/rcudnn/cudnn/src/api/convolution.rs b/rcudnn/cudnn/src/api/convolution.rs index 56c3f4cb8..d1bf9931a 100644 --- a/rcudnn/cudnn/src/api/convolution.rs +++ b/rcudnn/cudnn/src/api/convolution.rs @@ -42,8 +42,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Filter Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN Filter Descriptor.", + status as i32 as u64, )), } } @@ -51,8 +52,9 @@ impl API { unsafe fn ffi_destroy_filter_descriptor(desc: cudnnFilterDescriptor_t) -> Result<(), Error> { match cudnnDestroyFilterDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Filter Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Filter Descriptor.", + status as i32 as u64, )), } } @@ -72,8 +74,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => { Err(Error::NotSupported("`nb_dims` exceeds CUDNN_DIM_MAX.")) } - status => Err(Error::Unknown("Unable to set CUDA cuDNN Filter Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Filter Descriptor.", + status as i32 as u64, )), } } @@ -485,8 +488,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Convolution Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN Convolution Descriptor.", + status as i32 as u64, )), } } @@ -496,8 +500,9 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyConvolutionDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Convolution Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Convolution Descriptor.", + status as i32 as u64, )), } } diff --git a/rcudnn/cudnn/src/api/cuda.rs b/rcudnn/cudnn/src/api/cuda.rs index 9980a5e50..da0c0321c 100644 --- a/rcudnn/cudnn/src/api/cuda.rs +++ b/rcudnn/cudnn/src/api/cuda.rs @@ -1,8 +1,8 @@ //! Provides utility functionality for the CUDA cuDNN API. use crate::ffi::*; -use std::ptr; use crate::{Error, API}; +use std::ptr; impl API { /// Initialize the CUDA cuDNN API with needed context and resources. @@ -28,8 +28,9 @@ impl API { cudaError_t::cudaErrorMemoryAllocation => { Err(Error::AllocFailed("Unable to allocate CUDA device memory.")) } - status => Err(Error::Unknown("Unable to allocate CUDA device memory for unknown reasons.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to allocate CUDA device memory for unknown reasons.", + status as i32 as u64, )), } } @@ -38,14 +39,16 @@ impl API { match cudaFree(ptr) { cudaError_t::cudaSuccess => Ok(()), // TODO, more error enums sigh - cudaError_t::cudaErrorInvalidDevicePointer => { - Err(Error::InvalidValue("Unable to free the CUDA device memory due to invalid device pointer.")) - } - cudaError_t::cudaErrorInitializationError => { - Err(Error::NotInitialized("CUDA Driver/Runtime API not initialized.")) - } - status => Err(Error::Unknown("Unable to free the CUDA device memory.", status as i32 as u64)), - + cudaError_t::cudaErrorInvalidDevicePointer => Err(Error::InvalidValue( + "Unable to free the CUDA device memory due to invalid device pointer.", + )), + cudaError_t::cudaErrorInitializationError => Err(Error::NotInitialized( + "CUDA Driver/Runtime API not initialized.", + )), + status => Err(Error::Unknown( + "Unable to free the CUDA device memory.", + status as i32 as u64, + )), } } } diff --git a/rcudnn/cudnn/src/api/dropout.rs b/rcudnn/cudnn/src/api/dropout.rs index bbe29b8a2..87e891854 100644 --- a/rcudnn/cudnn/src/api/dropout.rs +++ b/rcudnn/cudnn/src/api/dropout.rs @@ -109,8 +109,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated")) } - status => Err(Error::Unknown("Unable create generic CUDA cuDNN Dropout Descriptor", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable create generic CUDA cuDNN Dropout Descriptor", + status as i32 as u64, )), } } @@ -119,8 +120,9 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyDropoutDescriptor(dropout_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Dropout Descriptor", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Dropout Descriptor", + status as i32 as u64, )), } } @@ -128,8 +130,9 @@ impl API { let mut size_in_bytes: usize = 0; match cudnnDropoutGetStatesSize(handle, &mut size_in_bytes) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size_in_bytes), - status => Err(Error::Unknown("Unable to get CUDA cuDNN Dropout Descriptor states size", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to get CUDA cuDNN Dropout Descriptor states size", + status as i32 as u64, )), } } @@ -139,8 +142,9 @@ impl API { let mut size_in_bytes: usize = 0; match cudnnDropoutGetReserveSpaceSize(xdesc, &mut size_in_bytes) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size_in_bytes), - status => Err(Error::Unknown("Unable to get CUDA cuDNN Dropout Descriptor reserved space size", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to get CUDA cuDNN Dropout Descriptor reserved space size", + status as i32 as u64, )), } } @@ -167,8 +171,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => Err(Error::ExecutionFailed( "The function failed to launch on the GPU", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Dropout Descriptor", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Dropout Descriptor", + status as i32 as u64, )), } } diff --git a/rcudnn/cudnn/src/api/normalization.rs b/rcudnn/cudnn/src/api/normalization.rs index 0e59ad58b..309a202cb 100644 --- a/rcudnn/cudnn/src/api/normalization.rs +++ b/rcudnn/cudnn/src/api/normalization.rs @@ -142,8 +142,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN LRN Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN LRN Descriptor.", + status as i32 as u64, )), } } @@ -151,8 +152,9 @@ impl API { unsafe fn ffi_destroy_lrn_descriptor(desc: cudnnLRNDescriptor_t) -> Result<(), Error> { match cudnnDestroyLRNDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN LRN Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN LRN Descriptor.", + status as i32 as u64, )), } } @@ -169,8 +171,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "One of the input parameters was out of range.", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Pooling Descriptor.", + status as i32 as u64, )), } } diff --git a/rcudnn/cudnn/src/api/pooling.rs b/rcudnn/cudnn/src/api/pooling.rs index 24cfb8c5f..98683a79a 100644 --- a/rcudnn/cudnn/src/api/pooling.rs +++ b/rcudnn/cudnn/src/api/pooling.rs @@ -202,8 +202,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Pooling Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN Pooling Descriptor.", + status as i32 as u64, )), } } @@ -211,8 +212,9 @@ impl API { unsafe fn ffi_destroy_pooling_descriptor(desc: cudnnPoolingDescriptor_t) -> Result<(), Error> { match cudnnDestroyPoolingDescriptor(desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Pooling Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Pooling Descriptor.", + status as i32 as u64, )), } } @@ -239,8 +241,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Pooling Descriptor.", + status as i32 as u64, )), } } @@ -270,8 +273,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - status => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to get CUDA cuDNN Pooling Descriptor.", + status as i32 as u64, )), } } @@ -303,8 +307,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Pooling Descriptor 2D.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Pooling Descriptor 2D.", + status as i32 as u64, )), } } @@ -336,8 +341,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam( "`window_dim_a`, `padding_a` or `stride_a` has negative element or invalid `mode`.", )), - status => Err(Error::Unknown("Unable to get CUDA cuDNN Pooling Descriptor 2D.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to get CUDA cuDNN Pooling Descriptor 2D.", + status as i32 as u64, )), } } diff --git a/rcudnn/cudnn/src/api/rnn.rs b/rcudnn/cudnn/src/api/rnn.rs index 61b4eb599..7ddd9fee6 100644 --- a/rcudnn/cudnn/src/api/rnn.rs +++ b/rcudnn/cudnn/src/api/rnn.rs @@ -3,8 +3,8 @@ //! Includes the RNN functionality. use crate::ffi::*; -use crate::{Error, API}; use crate::utils::DataType; +use crate::{Error, API}; // Workspace impl API { @@ -63,22 +63,20 @@ impl API { handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: ::libc::c_int, - x_desc: Vec + x_desc: Vec, ) -> Result { unsafe { - API::ffi_get_rnn_training_reserve_size( - handle, rnn_desc, seq_length, x_desc.as_slice() - ) + API::ffi_get_rnn_training_reserve_size(handle, rnn_desc, seq_length, x_desc.as_slice()) } } unsafe fn ffi_get_rnn_training_reserve_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, seq_length: ::libc::c_int, - x_desc: &[cudnnTensorDescriptor_t] + x_desc: &[cudnnTensorDescriptor_t], ) -> Result<::libc::size_t, Error> { let mut size: ::libc::size_t = 0; - let size_ptr : *mut ::libc::size_t = &mut size; + let size_ptr: *mut ::libc::size_t = &mut size; match cudnnGetRNNTrainingReserveSize(handle, rnn_desc,seq_length, x_desc.as_ptr(), size_ptr) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(size), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("At least one of the following conditions are met: One of the parameters `handle`, `x_desc`, `rnn_desc` is NULL. The tensors in `x_desc` are not of the same data type. The batch size of the tensors `x_desc` are not decreasing or staying constant.")), @@ -99,22 +97,17 @@ impl API { handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, x_desc: cudnnTensorDescriptor_t, - data_type: DataType + data_type: DataType, ) -> Result { unsafe { - API::ffi_get_rnn_params_size( - handle, - rnn_desc, - x_desc, - API::cudnn_data_type(data_type) - ) + API::ffi_get_rnn_params_size(handle, rnn_desc, x_desc, API::cudnn_data_type(data_type)) } } unsafe fn ffi_get_rnn_params_size( handle: cudnnHandle_t, rnn_desc: cudnnRNNDescriptor_t, x_desc: cudnnTensorDescriptor_t, - data_type: cudnnDataType_t + data_type: cudnnDataType_t, ) -> Result<::libc::size_t, Error> { let mut size: ::libc::size_t = 0; let size_ptr: *mut ::libc::size_t = &mut size; @@ -141,8 +134,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated")) } - status => Err(Error::Unknown("Unable create generic CUDA cuDNN RNN Descriptor", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable create generic CUDA cuDNN RNN Descriptor", + status as i32 as u64, )), } } @@ -150,16 +144,16 @@ impl API { /// cudnnCreateRNNDataDescriptor() /// https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnCreateRNNDataDescriptor pub fn create_rnn_data_descriptor() -> Result { - unsafe { - API::ffi_create_rnn_data_descriptor() - } + unsafe { API::ffi_create_rnn_data_descriptor() } } unsafe fn ffi_create_rnn_data_descriptor() -> Result { let mut rnn_data_descriptor: cudnnRNNDataDescriptor_t = ::std::ptr::null_mut(); match cudnnCreateRNNDataDescriptor(&mut rnn_data_descriptor) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(rnn_data_descriptor), - status => Err(Error::Unknown("Unable to create Data Descriptor", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to create Data Descriptor", + status as i32 as u64, + )), } } @@ -172,8 +166,9 @@ impl API { unsafe fn ffi_destroy_rnn_descriptor(rnn_desc: cudnnRNNDescriptor_t) -> Result<(), Error> { match cudnnDestroyRNNDescriptor(rnn_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Dropout Descriptor", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Dropout Descriptor", + status as i32 as u64, )), } } @@ -192,10 +187,10 @@ impl API { algorithm: cudnnRNNAlgo_t, data_type: DataType, ) -> Result<(), Error> { - let data_type = match data_type { + let data_type = match data_type { DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, - DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF, }; unsafe { @@ -241,8 +236,10 @@ impl API { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), - status => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Descriptor.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN RNN Descriptor.", + status as i32 as u64, + )), } } @@ -251,22 +248,25 @@ impl API { /// /// [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnSetRNNMatrixMathType /// [2]: https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor-ops-rnn-functions-pre-req - pub fn set_rnn_matrix_math_type(rnn_desc : cudnnRNNDescriptor_t, math_type: cudnnMathType_t) -> Result<(), Error> { - unsafe{ - API::ffi_set_rnn_matrix_math_type(rnn_desc, math_type) - } + pub fn set_rnn_matrix_math_type( + rnn_desc: cudnnRNNDescriptor_t, + math_type: cudnnMathType_t, + ) -> Result<(), Error> { + unsafe { API::ffi_set_rnn_matrix_math_type(rnn_desc, math_type) } } - unsafe fn ffi_set_rnn_matrix_math_type(rnn_desc: cudnnRNNDescriptor_t, math_type: cudnnMathType_t) -> Result<(), Error> { - match cudnnSetRNNMatrixMathType( - rnn_desc, - math_type - ) { - cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), - cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), - status => Err(Error::Unknown("Unable to set CUDA cuDNN RNN Matrix Math Type.", status as i32 as u64)), - - } + unsafe fn ffi_set_rnn_matrix_math_type( + rnn_desc: cudnnRNNDescriptor_t, + math_type: cudnnMathType_t, + ) -> Result<(), Error> { + match cudnnSetRNNMatrixMathType(rnn_desc, math_type) { + cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), + cudnnStatus_t::CUDNN_STATUS_BAD_PARAM => Err(Error::BadParam("FIXME RNN")), + cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported("FIXME RNN")), + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN RNN Matrix Math Type.", + status as i32 as u64, + )), + } } /// Set RNN Padding Model [cudnnSetRNNPaddingMode][1] @@ -277,12 +277,16 @@ impl API { /// By default, the padded RNN input/output is not enabled. /// /// [1]: https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnSetRNNPaddingMode - pub fn set_rnn_padding_mode(rnn_desc: cudnnRNNDescriptor_t, padding_mode: cudnnRNNPaddingMode_t) -> Result<(), Error> { - unsafe { - API::ffi_set_rnn_padding_mode(rnn_desc, padding_mode) - } + pub fn set_rnn_padding_mode( + rnn_desc: cudnnRNNDescriptor_t, + padding_mode: cudnnRNNPaddingMode_t, + ) -> Result<(), Error> { + unsafe { API::ffi_set_rnn_padding_mode(rnn_desc, padding_mode) } } - unsafe fn ffi_set_rnn_padding_mode(rnn_desc: cudnnRNNDescriptor_t, padding_mode: cudnnRNNPaddingMode_t) -> Result<(), Error> { + unsafe fn ffi_set_rnn_padding_mode( + rnn_desc: cudnnRNNDescriptor_t, + padding_mode: cudnnRNNPaddingMode_t, + ) -> Result<(), Error> { match cudnnSetRNNPaddingMode( rnn_desc, padding_mode, @@ -492,30 +496,29 @@ impl API { work_space: *mut ::libc::c_void, work_size_in_bytes: ::libc::size_t, ) -> Result<(), Error> { - unsafe { - API::ffi_rnn_forward_inference( - handle, - rnn_desc, - seq_length, - x_desc, - x, - hx_desc, - hx, - cx_desc, - cx, - w_desc, - w, - y_desc, - y, - hy_desc, - hy, - cy_desc, - cy, - work_space, - work_size_in_bytes, - - ) - } + unsafe { + API::ffi_rnn_forward_inference( + handle, + rnn_desc, + seq_length, + x_desc, + x, + hx_desc, + hx, + cx_desc, + cx, + w_desc, + w, + y_desc, + y, + hy_desc, + hy, + cy_desc, + cy, + work_space, + work_size_in_bytes, + ) + } } #[allow(clippy::too_many_arguments)] unsafe fn ffi_rnn_forward_inference( @@ -752,27 +755,27 @@ impl API { /// the weight gradients calculated will be added to those already existing in `dw`. /// Workspace is required for intermediate storage. /// The data in reserveSpace must have previously been generated by cudnnRNNBackwardData(). - /// - /// # Arguments - /// `handle` Handle to a previously created [cudNN context][0] - /// `rnn_desc` A previously initialised [RNN descriptor][1] - /// `seq_length` Number of iterations for the RNN to unroll over. - /// `x_desc` Array of packed tensor descriptors. - /// `x` Data pointer for Input - /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. - /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. - /// `y_desc` Array of packed [tensor descriptors][1] describing the *output* from each recurrent - /// iteration. - /// `y` Data pointer to GPU memory for output at each iteration - /// `dw_desc` Handle to previously initialized filter descriptor for the gradient of the - /// weights. - /// `dw` Data pointer to GPU memory for the descriptor of the gradient of the weights. - /// `workspace` Data pointer to GPU memory to be used as a workspace for this call - /// `workspace_in_bytes` Size in bytes of the provided workspace - /// `reserve_space` Data pointer for GPU memory to be used as a reserve space for this call - /// `reserve_space_in_bytes` Size in bytes for `reserve_space` - /// [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t - /// [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t + /// + /// # Arguments + /// `handle` Handle to a previously created [cudNN context][0] + /// `rnn_desc` A previously initialised [RNN descriptor][1] + /// `seq_length` Number of iterations for the RNN to unroll over. + /// `x_desc` Array of packed tensor descriptors. + /// `x` Data pointer for Input + /// `hx_desc` Fully packed tensor descriptor for the initial hidden state of the RNN. + /// `hx` Data pointer for initial hidden state - if null will initialize state to zero. + /// `y_desc` Array of packed [tensor descriptors][1] describing the *output* from each recurrent + /// iteration. + /// `y` Data pointer to GPU memory for output at each iteration + /// `dw_desc` Handle to previously initialized filter descriptor for the gradient of the + /// weights. + /// `dw` Data pointer to GPU memory for the descriptor of the gradient of the weights. + /// `workspace` Data pointer to GPU memory to be used as a workspace for this call + /// `workspace_in_bytes` Size in bytes of the provided workspace + /// `reserve_space` Data pointer for GPU memory to be used as a reserve space for this call + /// `reserve_space_in_bytes` Size in bytes for `reserve_space` + /// [0]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnHandle_t + /// [1]:https://docs.nvidia.com/deeplearning/sdk/cudnn-api/index.html#cudnnRNNDescriptor_t #[allow(clippy::too_many_arguments)] pub fn rnn_backward_weights( handle: cudnnHandle_t, diff --git a/rcudnn/cudnn/src/api/tensor.rs b/rcudnn/cudnn/src/api/tensor.rs index d5376aa9b..e377310ad 100644 --- a/rcudnn/cudnn/src/api/tensor.rs +++ b/rcudnn/cudnn/src/api/tensor.rs @@ -134,8 +134,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_ALLOC_FAILED => { Err(Error::AllocFailed("The resources could not be allocated.")) } - status => Err(Error::Unknown("Unable to create generic CUDA cuDNN Tensor Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to create generic CUDA cuDNN Tensor Descriptor.", + status as i32 as u64, )), } } @@ -145,8 +146,9 @@ impl API { ) -> Result<(), Error> { match cudnnDestroyTensorDescriptor(tensor_desc) { cudnnStatus_t::CUDNN_STATUS_SUCCESS => Ok(()), - status => Err(Error::Unknown("Unable to destroy CUDA cuDNN Tensor Descriptor context.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy CUDA cuDNN Tensor Descriptor context.", + status as i32 as u64, )), } } @@ -166,8 +168,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported( "`nb_dims` exceeds CUDNN_DIM_MAX or 2 Giga-elements.", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Tensor Descriptor.", + status as i32 as u64, )), } } @@ -195,8 +198,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_SUPPORTED => Err(Error::NotSupported( "`nb_dims` exceeds CUDNN_DIM_MAX or 2 Giga-elements.", )), - status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor Descriptor.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Tensor Descriptor.", + status as i32 as u64, )), } } @@ -252,8 +256,10 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => { Err(Error::ExecutionFailed("Execution failed to launch on GPU.")) } - status => Err(Error::Unknown("Unable to set CUDA cuDNN Tensor.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to set CUDA cuDNN Tensor.", + status as i32 as u64, + )), } } @@ -271,8 +277,10 @@ impl API { cudnnStatus_t::CUDNN_STATUS_EXECUTION_FAILED => { Err(Error::ExecutionFailed("Execution failed to launch on GPU.")) } - status => Err(Error::Unknown("Unable to scale CUDA cuDNN Tensor.", status as i32 as u64)), - + status => Err(Error::Unknown( + "Unable to scale CUDA cuDNN Tensor.", + status as i32 as u64, + )), } } } diff --git a/rcudnn/cudnn/src/api/utils.rs b/rcudnn/cudnn/src/api/utils.rs index eecad34b3..af0f34da1 100644 --- a/rcudnn/cudnn/src/api/utils.rs +++ b/rcudnn/cudnn/src/api/utils.rs @@ -1,9 +1,9 @@ //! Provides utility functionality for the CUDA cuDNN API. use crate::ffi::*; -use std::ptr; -use crate::{Error, API}; use crate::utils::DataType; +use crate::{Error, API}; +use std::ptr; impl API { /// Initialize the CUDA cuDNN API with needed context and resources. @@ -49,8 +49,9 @@ impl API { cudnnStatus_t::CUDNN_STATUS_NOT_INITIALIZED => Err(Error::NotInitialized( "CUDA Driver/Runtime API not initialized.", )), - status => Err(Error::Unknown("Unable to destroy the CUDA cuDNN context/resources.", status as i32 as u64 - + status => Err(Error::Unknown( + "Unable to destroy the CUDA cuDNN context/resources.", + status as i32 as u64, )), } } @@ -62,7 +63,7 @@ impl API { match data_type { DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, - DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF, } } } diff --git a/rcudnn/cudnn/src/cuda.rs b/rcudnn/cudnn/src/cuda.rs index 213e6d381..636b7037f 100644 --- a/rcudnn/cudnn/src/cuda.rs +++ b/rcudnn/cudnn/src/cuda.rs @@ -18,18 +18,12 @@ impl CudaDeviceMemory { /// Saw fun X Y Z pub fn new(size: usize) -> Result { let ptr = API::cuda_allocate_device_memory(size)?; - Ok(CudaDeviceMemory { - ptr, - size, - }) + Ok(CudaDeviceMemory { ptr, size }) } /// Initializes a new CUDA Device Memory from its C type. pub fn from_c(ptr: *mut ::libc::c_void, size: usize) -> CudaDeviceMemory { - CudaDeviceMemory { - ptr, - size, - } + CudaDeviceMemory { ptr, size } } /// Returns the CUDA Device Memory ptr as its C type. diff --git a/rcudnn/cudnn/src/cudnn.rs b/rcudnn/cudnn/src/cudnn.rs index f794e2aa4..2630a90cc 100644 --- a/rcudnn/cudnn/src/cudnn.rs +++ b/rcudnn/cudnn/src/cudnn.rs @@ -6,15 +6,15 @@ use super::utils::{ ActivationConfig, ConvolutionConfig, DataTypeInfo, DropoutConfig, NormalizationConfig, - PoolingConfig, ScalParams, RnnConfig + PoolingConfig, RnnConfig, ScalParams, }; use super::*; use crate::cuda::CudaDeviceMemory; use num::traits::Float; use std::mem::transmute_copy; -use utils::DataType; use tensor_descriptor::tensor_vec_id_c; +use utils::DataType; #[derive(Debug, Clone)] /// Provides a the high-level interface to CUDA's cuDNN. @@ -176,7 +176,8 @@ impl Cudnn { pub fn init_dropout(&self, probability: f32, seed: u64) -> Result { let reserve_required: usize = API::dropout_get_states_size(*self.id_c())?; let reserve = CudaDeviceMemory::new(reserve_required)?; - let dropout = DropoutDescriptor::new(&self, probability, seed, *reserve.id_c(), reserve.size())?; + let dropout = + DropoutDescriptor::new(&self, probability, seed, *reserve.id_c(), reserve.size())?; Ok(DropoutConfig::new(dropout, reserve)) } @@ -195,35 +196,31 @@ impl Cudnn { network_mode: cudnnRNNMode_t, algorithm: cudnnRNNAlgo_t, data_type: DataType, - math_type: cudnnMathType_t + math_type: cudnnMathType_t, ) -> Result { - - let data_type = match data_type { + let data_type = match data_type { DataType::Float => cudnnDataType_t::CUDNN_DATA_FLOAT, DataType::Double => cudnnDataType_t::CUDNN_DATA_DOUBLE, - DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF + DataType::Half => cudnnDataType_t::CUDNN_DATA_HALF, }; - API::set_rnn_matrix_math_type( - *rnn_desc.id_c(), - math_type - )?; + API::set_rnn_matrix_math_type(*rnn_desc.id_c(), math_type)?; - let workspace_size : usize = API::get_rnn_workspace_size( + let workspace_size: usize = API::get_rnn_workspace_size( *self.id_c(), *rnn_desc.id_c(), seq_length, - tensor_vec_id_c(x_desc) + tensor_vec_id_c(x_desc), )?; - let training_reserve_size : usize = API::get_rnn_training_reserve_size( + let training_reserve_size: usize = API::get_rnn_training_reserve_size( *self.id_c(), *rnn_desc.id_c(), seq_length, - tensor_vec_id_c(x_desc) + tensor_vec_id_c(x_desc), )?; - let training_reserve : CudaDeviceMemory = CudaDeviceMemory::new(training_reserve_size)?; + let training_reserve: CudaDeviceMemory = CudaDeviceMemory::new(training_reserve_size)?; Ok(RnnConfig::new( rnn_desc, @@ -238,7 +235,7 @@ impl Cudnn { data_type, workspace_size, training_reserve_size, - training_reserve + training_reserve, )) } @@ -264,9 +261,11 @@ impl Cudnn { cell_output_desc: &TensorDescriptor, cell_output: *mut ::libc::c_void, workspace: *mut ::libc::c_void, - reserve_data: *mut ::libc::c_void + reserve_data: *mut ::libc::c_void, ) -> Result<(), Error> - where T: Float + DataTypeInfo { + where + T: Float + DataTypeInfo, + { API::rnn_forward_training( *self.id_c(), *(rnn_config.rnn_desc().id_c()), @@ -288,7 +287,7 @@ impl Cudnn { workspace, rnn_config.rnn_workspace_size(), reserve_data, - rnn_config.training_reserve_size() + rnn_config.training_reserve_size(), ) } @@ -322,7 +321,9 @@ impl Cudnn { workspace: *mut ::libc::c_void, reserve_data: *mut ::libc::c_void, ) -> Result<(), Error> - where T: Float + DataTypeInfo { + where + T: Float + DataTypeInfo, + { API::rnn_backward_data( *self.id_c(), *(rnn_config.rnn_desc().id_c()), @@ -371,7 +372,9 @@ impl Cudnn { workspace: *mut ::libc::c_void, reserve_data: *mut ::libc::c_void, ) -> Result<(), Error> - where T: Float + DataTypeInfo { + where + T: Float + DataTypeInfo, + { API::rnn_backward_weights( *self.id_c(), *(rnn_config.rnn_desc().id_c()), diff --git a/rcudnn/cudnn/src/dropout_descriptor.rs b/rcudnn/cudnn/src/dropout_descriptor.rs index a26f6ade1..1b86842e2 100644 --- a/rcudnn/cudnn/src/dropout_descriptor.rs +++ b/rcudnn/cudnn/src/dropout_descriptor.rs @@ -49,9 +49,7 @@ impl DropoutDescriptor { /// Initializes a new CUDA cuDNN Tensor Descriptor from its C type. pub fn from_c(id: cudnnDropoutDescriptor_t) -> DropoutDescriptor { - DropoutDescriptor { - id - } + DropoutDescriptor { id } } /// Returns the CUDA cuDNN Tensor Descriptor as its C type. diff --git a/rcudnn/cudnn/src/lib.rs b/rcudnn/cudnn/src/lib.rs index f8fa74874..e5ecb7fb9 100644 --- a/rcudnn/cudnn/src/lib.rs +++ b/rcudnn/cudnn/src/lib.rs @@ -82,8 +82,8 @@ pub use self::error::Error; pub use self::filter_descriptor::FilterDescriptor; pub use self::normalization_descriptor::NormalizationDescriptor; pub use self::pooling_descriptor::PoolingDescriptor; -pub use self::tensor_descriptor::TensorDescriptor; pub use self::rnn_descriptor::RnnDescriptor; +pub use self::tensor_descriptor::TensorDescriptor; pub use crate::ffi::*; #[derive(Debug, Copy, Clone)] @@ -100,6 +100,6 @@ mod error; mod filter_descriptor; mod normalization_descriptor; mod pooling_descriptor; -mod tensor_descriptor; mod rnn_descriptor; +mod tensor_descriptor; pub mod utils; diff --git a/rcudnn/cudnn/src/rnn_descriptor.rs b/rcudnn/cudnn/src/rnn_descriptor.rs index cd76f87d8..fbf34c2f2 100644 --- a/rcudnn/cudnn/src/rnn_descriptor.rs +++ b/rcudnn/cudnn/src/rnn_descriptor.rs @@ -3,11 +3,11 @@ //! A Recurrent Descriptor is used to hold information about the rnn, //! which is needed for forward and backward rnnal operations. -use ffi::*; -use super::{API, Error}; -use crate::utils::DropoutConfig; +use super::{Error, API}; use crate::utils::DataType; +use crate::utils::DropoutConfig; use crate::Cudnn; +use ffi::*; /// Describes a Recurrent Descriptor. #[derive(Debug)] @@ -39,24 +39,21 @@ impl RnnDescriptor { padding_mode: cudnnRNNPaddingMode_t, ) -> Result { let generic_rnn_desc = API::create_rnn_descriptor()?; - API::set_rnn_descriptor( - *handle.id_c(), - generic_rnn_desc, - hidden_size, - num_layers, - *dropout_config.dropout_desc().id_c(), - input_mode, - direction, - mode, - algorithm, - data_type, - )?; - - API::set_rnn_padding_mode( + API::set_rnn_descriptor( + *handle.id_c(), generic_rnn_desc, - padding_mode, + hidden_size, + num_layers, + *dropout_config.dropout_desc().id_c(), + input_mode, + direction, + mode, + algorithm, + data_type, )?; + API::set_rnn_padding_mode(generic_rnn_desc, padding_mode)?; + Ok(RnnDescriptor { id: generic_rnn_desc, dropout_config, diff --git a/rcudnn/cudnn/src/tensor_descriptor.rs b/rcudnn/cudnn/src/tensor_descriptor.rs index d3d77f618..813b2e205 100644 --- a/rcudnn/cudnn/src/tensor_descriptor.rs +++ b/rcudnn/cudnn/src/tensor_descriptor.rs @@ -16,9 +16,7 @@ pub struct TensorDescriptor { /// Return C Handle for a Vector of Tensor Descriptors pub fn tensor_vec_id_c(tensor_vec: &[TensorDescriptor]) -> Vec { - tensor_vec.iter().map(|tensor| { - *tensor.id_c() - }).collect() + tensor_vec.iter().map(|tensor| *tensor.id_c()).collect() } impl Drop for TensorDescriptor { @@ -92,4 +90,3 @@ impl TensorDescriptor { &self.id } } - diff --git a/rcudnn/cudnn/src/utils.rs b/rcudnn/cudnn/src/utils.rs index 1e1c6a0d2..7d91cb73f 100644 --- a/rcudnn/cudnn/src/utils.rs +++ b/rcudnn/cudnn/src/utils.rs @@ -2,7 +2,7 @@ use super::{ ActivationDescriptor, ConvolutionDescriptor, DropoutDescriptor, FilterDescriptor, - NormalizationDescriptor, PoolingDescriptor, RnnDescriptor + NormalizationDescriptor, PoolingDescriptor, RnnDescriptor, }; use crate::cuda::CudaDeviceMemory; @@ -275,14 +275,14 @@ impl DropoutConfig { /// * `hidden_size` Size of the hidden layer /// * `num_layers` Number of layers /// * `dropout_desc` Descriptor to a previously created & initialized dropout descriptor, applied -/// between layers. +/// between layers. /// * `input_mode` Specifies behaviour at the input to the first layer /// * `direction_mode` Specifies the recurrence pattern - i.e bidirectional /// * `rnn_mode` Type of network used in routines ForwardInference, ForwardTraining, BackwardData, /// BackwardWeights. Can be ReLU, tanh, LSTM (Long Short Term Memory), or GRU (Gated Recurrent Unit). /// * `algo` - Only required in v6 implementation FIXME: Should this be checked in compilation? /// * `data_type` Math Precision - default f32 -/// +/// /// The LSTM network offered by CUDNN is a four-gate network that does not use peephole connections. /// Greff, et al. (2015)[1] suggests it doesn't matter what kind of network it is, although /// Jozefowicz, et al. (2015)[2] suggests that the most important gates are the forget and input, @@ -329,7 +329,7 @@ impl RnnConfig { data_type: cudnnDataType_t, workspace_size: usize, training_reserve_size: usize, - training_reserve: CudaDeviceMemory + training_reserve: CudaDeviceMemory, ) -> RnnConfig { RnnConfig { rnn_desc, @@ -344,7 +344,7 @@ impl RnnConfig { data_type, workspace_size, training_reserve_size, - training_reserve + training_reserve, } } @@ -357,10 +357,12 @@ impl RnnConfig { self.rnn_workspace_size() } /// Training Reserve Size for RNN - pub fn training_reserve_size(&self) -> usize { self.training_reserve_size } + pub fn training_reserve_size(&self) -> usize { + self.training_reserve_size + } /// Training Reserve Space on GPU for RNN pub fn training_reserve(&self) -> &CudaDeviceMemory { - &self.training_reserve + &self.training_reserve } /// Accessor function for Rnn Descriptor diff --git a/rcudnn/cudnn/tests/cudnn_specs.rs b/rcudnn/cudnn/tests/cudnn_specs.rs index 035b52495..525e37276 100644 --- a/rcudnn/cudnn/tests/cudnn_specs.rs +++ b/rcudnn/cudnn/tests/cudnn_specs.rs @@ -1,6 +1,6 @@ -extern crate rcudnn as cudnn; extern crate coaster as co; extern crate libc; +extern crate rcudnn as cudnn; extern crate rcudnn_sys as ffi; use crate::ffi::*; @@ -11,14 +11,13 @@ use crate::ffi::*; mod cudnn_spec { use crate::co::framework::IFramework; - + use crate::co::frameworks::Cuda; use crate::cudnn::cuda::CudaDeviceMemory; use crate::cudnn::utils::DataType; use crate::cudnn::utils::DropoutConfig; use crate::cudnn::{ - ActivationDescriptor, ConvolutionDescriptor, Cudnn, FilterDescriptor, - TensorDescriptor, API, + ActivationDescriptor, ConvolutionDescriptor, Cudnn, FilterDescriptor, TensorDescriptor, API, }; #[test] @@ -51,7 +50,8 @@ mod cudnn_spec { let cudnn = Cudnn::new().unwrap(); let desc = TensorDescriptor::new(&[2, 2, 2], &[4, 2, 1], DataType::Float).unwrap(); let acti = - ActivationDescriptor::new(crate::cudnnActivationMode_t::CUDNN_ACTIVATION_SIGMOID).unwrap(); + ActivationDescriptor::new(crate::cudnnActivationMode_t::CUDNN_ACTIVATION_SIGMOID) + .unwrap(); let mut a: u64 = 1; let a_ptr: *mut u64 = &mut a; diff --git a/rust-blas/src/math/mod.rs b/rust-blas/src/math/mod.rs index 6fc5ed43f..7d3310fb2 100644 --- a/rust-blas/src/math/mod.rs +++ b/rust-blas/src/math/mod.rs @@ -8,8 +8,8 @@ use std::ops::{BitXor, Deref}; pub use self::mat::Mat; -pub mod mat; pub mod bandmat; +pub mod mat; pub mod matrix; pub mod matrix_vector; pub mod vector;