Skip to content

Commit

Permalink
Merge pull request #74 from avik-pal/ap/tests
Browse files Browse the repository at this point in the history
Add tests for utility functions
  • Loading branch information
avik-pal authored Jun 29, 2022
2 parents 077d67f + 94fe305 commit 183f1c7
Show file tree
Hide file tree
Showing 28 changed files with 328 additions and 202 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/Downstream.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Downstream
on:
pull_request:
branches:
- main
push:
branches:
- main
concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
jobs:
test:
name: ${{ matrix.package.repo }}/${{ matrix.package.group }}
runs-on: ${{ matrix.os }}
env:
GROUP: ${{ matrix.package.group }}
strategy:
fail-fast: false
matrix:
julia-version: [1.7]
os: [ubuntu-latest]
package:
- { user: SciML, repo: DiffEqFlux.jl, group: BasicNeuralDE }
- { user: SciML, repo: DiffEqFlux.jl, group: AdvancedNeuralDE }
- { user: SciML, repo: DeepEquilibriumNetworks.jl, group: All }
if: contains(github.event.pull_request.labels.*.name, 'run downstream test')
steps:
- uses: actions/checkout@v2
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.julia-version }}
arch: x64
- uses: julia-actions/julia-buildpkg@latest
- name: Clone Downstream
uses: actions/checkout@v2
with:
repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
path: downstream
- name: Load this and run the downstream tests
shell: julia --code-coverage=user --color=yes --project=downstream {0}
run: |
using Pkg
try
# force it to use this PR's version of the package
Pkg.develop(PackageSpec(path=".")) # resolver may fail with main deps
Pkg.update()
Pkg.test() # resolver may fail with test time deps
catch err
err isa Pkg.Resolve.ResolverError || rethrow()
# If we can't resolve that means this is incompatible by SemVer and this is fine
# It means we marked this as a breaking change, so we don't need to worry about
# Mistakenly introducing a breaking change, as we have intentionally made one
@info "Not compatible with this release. No problem." exception=err
exit(0) # Exit immediately, as a success
end
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v2
with:
files: lcov.info
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ wip
model_weights

docs/docs
docs/site
docs/site

scripts
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# v0.4

## v0.4.7

- Manual detailing Lux Interface
- Fixes bug with ComponentArray + Optimiser
https://github.com/FluxML/Optimisers.jl/issues/91
- `Dropout` Layers caches `1 / (1 - p)` for minor improvements for forward pass
- `dropout` has a custom rrule -- significantly improves performance for smaller arrays

## v0.4.6

- Documentation revamped
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Lux"
uuid = "b2108857-7c20-44ae-9111-449ecde12c47"
authors = ["Avik Pal <[email protected]> and contributors"]
version = "0.4.7-DEV"
version = "0.4.7"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
Expand Down
2 changes: 1 addition & 1 deletion docs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ theme:
twitter_name: "@avikpal1410"
twitter_url: "https://twitter.com/avikpal1410"

# TODO: Setup mkdocs for showing documentation versions
# TODO(@avik-pal): Setup mkdocs for showing documentation versions
# extra:
# version:
# provider: mike
Expand Down
7 changes: 2 additions & 5 deletions docs/src/api/utilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,9 @@ Lux.zeros32
Lux.applyactivation
Lux.elementwise_add
Lux.elementwise_mul
```

## RNN Utilities

```@docs
Lux.istraining
Lux.multigate
Lux.replicate
```

## Index
Expand Down
2 changes: 1 addition & 1 deletion docs/src/devdocs/style_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ We do have automatic formatter, which opens PR after fixing common style issues,
in src should have a complementary file in the test folder, containing tests relevant to
that file's contents.

* Add generic utilities for testing in `test/utils.jl` and include them in the relevant
* Add generic utilities for testing in `test/test_utils.jl` and include them in the relevant
files.

* Use [JET.jl](https://aviatesk.github.io/JET.jl/dev/) to test for dynamic dispatch in the
Expand Down
5 changes: 0 additions & 5 deletions docs/src/manual/migrate_from_flux.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,6 @@ model or running inference. This is the default mode for `Flux.BatchNorm`, `Flux
do exactly what the user wants), hence our default mode is `training`. This can be changed
using [`Lux.testmode`](@ref).
### Group Normalization
`Flux.GroupNorm` sets `track_stats=true` by default. We set it to `false` since we found
little to no reference for tracking statistics in Group Normalization.
## Can't access functions like `relu`, `sigmoid`, etc?
Unlike Flux we don't reexport functionality from `NNlib`, all you need to do to fix this is
Expand Down
2 changes: 1 addition & 1 deletion lib/Boltz/src/Boltz.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using Statistics
using Artifacts, LazyArtifacts
using JLD2

# TODO: We want to have generic Lux implementaions for Metalhead models
# TODO(@avik-pal): We want to have generic Lux implementaions for Metalhead models
# We can automatically convert several Metalhead.jl models to Lux
using Metalhead

Expand Down
2 changes: 1 addition & 1 deletion src/adapt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function adapt_storage(to::LuxCPUAdaptor, x::ComponentArray)
return ComponentArray(adapt_storage(to, getdata(x)), getaxes(x))
end
adapt_storage(::LuxCPUAdaptor, rng::AbstractRNG) = rng
# TODO: SparseArrays
# TODO(@avik-pal): SparseArrays
function adapt_storage(::LuxCPUAdaptor,
x::CUDA.CUSPARSE.CUDA.CUSPARSE.AbstractCuSparseMatrix)
return adapt(Array, x)
Expand Down
24 changes: 15 additions & 9 deletions src/autodiff.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
ChainRulesCore.@non_differentiable replicate(::Any)
ChainRulesCore.@non_differentiable update_statistics(::Any, ::Any, ::Any, ::Any, ::Any,
::Any, ::Any)
ChainRulesCore.@non_differentiable generate_dropout_mask(::Any, ::Any, ::Any)
ChainRulesCore.@non_differentiable generate_dropout_mask(::Any, ::Any, ::Any, ::Any)
ChainRulesCore.@non_differentiable compute_adaptive_pooling_dims(::Any, ::Any)
ChainRulesCore.@non_differentiable glorot_normal(::Any...)
ChainRulesCore.@non_differentiable glorot_uniform(::Any...)
Expand All @@ -18,14 +18,9 @@ function ChainRulesCore.rrule(::typeof(Base.broadcasted), ::typeof(identity), x)
end

# NNlib Functions
function ChainRulesCore.rrule(::typeof(batchnorm),
g::CuArray{T},
b::CuArray{T},
x::Union{CuArray{T, 4}, CuArray{T, 5}},
running_mean,
running_var,
momentum;
kwargs...) where {T <: CUDNNFloat}
function ChainRulesCore.rrule(::typeof(batchnorm), g::CuArray{T}, b::CuArray{T},
x::Union{CuArray{T, 4}, CuArray{T, 5}}, running_mean,
running_var, momentum; kwargs...) where {T <: CUDNNFloat}
y = batchnorm(g, b, x, running_mean, running_var, momentum; kwargs...)
function batchnorm_pullback(dy)
dg, db, dx = ∇batchnorm(g, b, x, dy, running_mean, running_var, momentum; kwargs...)
Expand All @@ -34,6 +29,17 @@ function ChainRulesCore.rrule(::typeof(batchnorm),
return y, batchnorm_pullback
end

function ChainRulesCore.rrule(::typeof(dropout), rng::AbstractRNG, x::AbstractArray{T, N},
p::T, q::T, dims, t::Val{training}) where {T, N, training}
y, mask, rng = dropout(rng, x, p, q, dims, t)
function dropout_pullback((dy, dmask, drng))
return (NoTangent(), NoTangent(), elementwise_mul(dy, mask), NoTangent(),
NoTangent(),
NoTangent(), NoTangent())
end
return (y, mask, rng), dropout_pullback
end

# Activation Rrules
function ChainRulesCore.rrule(::typeof(applyactivation), f::cudnnValidActivationTypes,
x::CuArray{T}) where {T <: CUDNNFloat}
Expand Down
2 changes: 1 addition & 1 deletion src/layers/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ keyword argument `disable_optimizations`.
- All sublayers are recursively optimized.
- If a function `f` is passed as a layer and it doesn't take 3 inputs, it is converted to
a WrappedFunction(`f`) which takes only one input.
a [`WrappedFunction`](@ref)(`f`) which takes only one input.
- If the layer is a Chain, it is flattened.
- [`NoOpLayer`](@ref)s are removed.
- If there is only 1 layer (left after optimizations), then it is returned without the
Expand Down
16 changes: 8 additions & 8 deletions src/layers/dropout.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,23 @@ See also [`VariationalHiddenDropout`](@ref)
"""
struct Dropout{T, D} <: AbstractExplicitLayer
p::T
q::T
dims::D
end

function initialstates(rng::AbstractRNG, ::Dropout)
# FIXME: Take PRNGs seriously
randn(rng, 1)
randn(rng)
return (rng=replicate(rng), training=Val(true))
end

function Dropout(p; dims=:)
@assert 0 p 1
iszero(p) && return NoOpLayer()
return Dropout(p, dims)
return Dropout(p, 1 / (1 - p), dims)
end

function (d::Dropout{T})(x::AbstractArray{T}, ps, st::NamedTuple) where {T}
y, _, rng = dropout(st.rng, x, d.p, d.dims, st.training)
y, _, rng = dropout(st.rng, x, d.p, d.q, d.dims, st.training)
return y, merge(st, (rng=rng,))
end

Expand Down Expand Up @@ -98,24 +98,24 @@ See also [`Dropout`](@ref)
"""
struct VariationalHiddenDropout{T, D} <: AbstractExplicitLayer
p::T
q::T
dims::D
end

function initialstates(rng::AbstractRNG, ::VariationalHiddenDropout)
# FIXME: Take PRNGs seriously
randn(rng, 1)
randn(rng)
return (rng=replicate(rng), training=Val(true), update_mask=Val(true),
mask=nothing)
end

function VariationalHiddenDropout(p; dims=:)
@assert 0 p 1
iszero(p) && return NoOpLayer()
return VariationalHiddenDropout(p, dims)
return VariationalHiddenDropout(p, 1 / (1 - p), dims)
end

function (d::VariationalHiddenDropout{T})(x::AbstractArray{T}, ps, st::NamedTuple) where {T}
y, mask, rng, update_mask = dropout(st.rng, x, st.mask, d.p, d.dims, st.training,
y, mask, rng, update_mask = dropout(st.rng, x, st.mask, d.p, d.q, d.dims, st.training,
st.update_mask)
return y, merge(st, (mask=mask, rng=rng, update_mask=update_mask))
end
Expand Down
25 changes: 6 additions & 19 deletions src/layers/normalize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,8 @@ struct BatchNorm{affine, track_stats, F1, F2, F3, N} <:
init_scale::F3
end

function BatchNorm(chs::Int,
activation=identity;
init_bias=zeros32,
init_scale=ones32,
affine::Bool=true,
track_stats::Bool=true,
epsilon=1.0f-5,
function BatchNorm(chs::Int, activation=identity; init_bias=zeros32, init_scale=ones32,
affine::Bool=true, track_stats::Bool=true, epsilon=1.0f-5,
momentum=0.1f0)
activation = NNlib.fast_act(activation)
return BatchNorm{affine, track_stats, typeof(activation), typeof(init_bias),
Expand Down Expand Up @@ -182,7 +177,7 @@ end

"""
GroupNorm(chs::Integer, groups::Integer, activation=identity; init_bias=zeros32,
init_scale=ones32, affine=true, track_stats=false, epsilon=1f-5,
init_scale=ones32, affine=true, track_stats=true, epsilon=1f-5,
momentum=0.1f0)
[Group Normalization](https://arxiv.org/abs/1803.08494) layer.
Expand Down Expand Up @@ -265,14 +260,8 @@ struct GroupNorm{affine, track_stats, F1, F2, F3, N} <:
groups::Int
end

function GroupNorm(chs::Int,
groups::Int,
activation=identity;
init_bias=zeros32,
init_scale=ones32,
affine::Bool=true,
track_stats::Bool=true,
epsilon=1.0f-5,
function GroupNorm(chs::Integer, groups::Integer, activation=identity; init_bias=zeros32,
init_scale=ones32, affine=true, track_stats=true, epsilon=1.0f-5,
momentum=0.1f0)
@assert chs % groups==0 "The number of groups ($(groups)) must divide the number of channels ($chs)"
activation = NNlib.fast_act(activation)
Expand Down Expand Up @@ -410,9 +399,7 @@ function (wn::WeightNorm)(x, ps, s::NamedTuple)
end

@inbounds @generated function get_normalized_parameters(::WeightNorm{Val{which_params}},
dims::T,
ps::Union{ComponentArray, NamedTuple
}) where {T, which_params}
dims::T, ps) where {T, which_params}
parameter_names = string.(which_params)
v_parameter_names = Symbol.(parameter_names .* "_v")
g_parameter_names = Symbol.(parameter_names .* "_g")
Expand Down
6 changes: 3 additions & 3 deletions src/layers/recurrent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ function initialparameters(rng::AbstractRNG, rnn::RNNCell{bias}) where {bias}
end

function initialstates(rng::AbstractRNG, ::RNNCell)
# FIXME: Take PRNGs seriously
# FIXME(@avik-pal): Take PRNGs seriously
randn(rng, 1)
return (rng=replicate(rng),)
end
Expand Down Expand Up @@ -202,7 +202,7 @@ function initialparameters(rng::AbstractRNG, lstm::LSTMCell)
end

function initialstates(rng::AbstractRNG, ::LSTMCell)
# FIXME: Take PRNGs seriously
# FIXME(@avik-pal): Take PRNGs seriously
randn(rng, 1)
return (rng=replicate(rng),)
end
Expand Down Expand Up @@ -310,7 +310,7 @@ function initialparameters(rng::AbstractRNG, gru::GRUCell)
end

function initialstates(rng::AbstractRNG, ::GRUCell)
# FIXME: Take PRNGs seriously
# FIXME(@avik-pal): Take PRNGs seriously
randn(rng, 1)
return (rng=replicate(rng),)
end
Expand Down
Loading

0 comments on commit 183f1c7

Please sign in to comment.