diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml
index 9224c9256..ee0e1e6f2 100644
--- a/.buildkite/testing.yml
+++ b/.buildkite/testing.yml
@@ -64,7 +64,6 @@ steps:
                 - src
                 - ext
         env:
-          RETESTITEMS_NWORKERS: 0
           JULIA_AMDGPU_CORE_MUST_LOAD: "1"
           JULIA_AMDGPU_HIP_MUST_LOAD: "1"
           JULIA_AMDGPU_DISABLE_ARTIFACTS: "1"
@@ -109,10 +108,10 @@ steps:
 
 
 env:
-  JULIA_PKG_PRECOMPILE_AUTO: 0
+  JULIA_AMDGPU_LOGGING_ENABLED: true
   RETESTITEMS_NWORKERS: 8
   RETESTITEMS_NWORKER_THREADS: 2
-  RETESTITEMS_TESTITEM_TIMEOUT: 10000
+  RETESTITEMS_TESTITEM_TIMEOUT: 3600
   JULIA_PKG_SERVER: ""
   JULIA_NUM_THREADS: 4
   SECRET_CODECOV_TOKEN: "jQ0BMTQgyZx7QGyU0Q2Ec7qB9mtE2q/tDu0FsfxvEG7/zOAGvXkyXrzIFFOQxvDoFcP+K2+hYZKMxicYdNqzr5wcxu505aNGN2GM3wyegAr+hO6q12bCFYx6qXzU9FLCCdeqINqn9gUSSOlGtWNFrbAlrTyz/D4Yo66TqBDzvaLL63FMnhCLaXW/zJt3hNuEAJaPY2O6Ze1rX2WZ3Y+i+s3uQ8aLImtoCJhPe8CRx+OhuYiTzGhynFfGntZ0738/1RN4gNM0S/hTC4gLE7XMVBanJpGh32rFaiDwW4zAyXKBrDkL3QA3MS1RvLTJxGJ085S16hCk0C4ddAhZCvIM9Q==;U2FsdGVkX1+bXdFeKMs5G79catOCyby2n07A2fg0FjVAvrjQLZ0yfvDS4paJiFikLkodho0khz2YALKb2Y0K6w=="
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 99e93ff4a..1b5dbcb56 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -149,6 +149,8 @@ jobs:
         with:
           version: ${{ matrix.version }}
       - uses: julia-actions/julia-downgrade-compat@v1
+        with:
+          skip: 'AMDGPU'
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         env:
diff --git a/Project.toml b/Project.toml
index a8688063a..fc063ccb0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -65,7 +65,7 @@ LuxZygoteExt = "Zygote"
 
 [compat]
 ADTypes = "0.2, 1"
-AMDGPU = "0.8.4, 0.9"
+AMDGPU = "0.8.4 - 0.9.4"
 Adapt = "4"
 Aqua = "0.8.4"
 ArgCheck = "2.1"
diff --git a/README.md b/README.md
index 776691944..2205a8d1f 100644
--- a/README.md
+++ b/README.md
@@ -25,16 +25,19 @@
 
 </div>
 
-The 🔥 Deep Learning Framework
+<div align="center">
+    <h2>Elegant & Performant Scientific Machine Learning in Julia</h2>
+    <h3>A Pure Julia Deep Learning Framework designed for Scientific Machine Learning</h3>
+</div>
 
-## Installation
+## 💻 Installation
 
 ```julia
 import Pkg
 Pkg.add("Lux")
 ```
 
-## Getting Started
+## 🤸 Quickstart
 
 ```julia
 using Lux, Random, Optimisers, Zygote
@@ -61,22 +64,18 @@ x = rand(rng, Float32, 128, 2) |> device
 y, st = Lux.apply(model, x, ps, st)
 
 # Gradients
-gs = gradient(p -> sum(Lux.apply(model, x, p, st)[1]), ps)[1]
+gs = only(gradient(p -> sum(first(Lux.apply(model, x, p, st))), ps))
 
 # Optimization
 st_opt = Optimisers.setup(Optimisers.Adam(0.0001), ps)
 st_opt, ps = Optimisers.update(st_opt, ps, gs)
 ```
 
-## Examples
+## 📚 Examples
 
 Look in the [examples](/examples/) directory for self-contained usage examples. The [documentation](https://lux.csail.mit.edu) has examples sorted into proper categories.
 
-## Ecosystem
-
-Checkout our [Ecosystem](http://lux.csail.mit.edu/dev/ecosystem) page for more details. 
-
-## Testing
+## 🧪 Testing
 
 The full test of `Lux.jl` takes a long time, here's how to test a portion of the code.
 
@@ -90,7 +89,7 @@ For example, let's consider the tests for `SkipConnection`:
 
 ```julia
 @testitem "SkipConnection" setup=[SharedTestSetup] tags=[:core_layers] begin
-    .....
+    ...
 end
 ```
 
@@ -118,7 +117,7 @@ use [TestEnv.jl](https://github.com/JuliaTesting/TestEnv.jl) as follows. Start w
 using TestEnv; TestEnv.activate(); using ReTestItems;
 
 # Assuming you are in the main directory of Lux
-ReTestItems.runtests("tests/"; name = <NAME OF THE TEST>)
+ReTestItems.runtests("tests/"; name = "NAME OF THE TEST")
 ```
 
 For the `SkipConnection` tests that would be:
@@ -127,11 +126,11 @@ For the `SkipConnection` tests that would be:
 ReTestItems.runtests("tests/"; name = SkipConnection)
 ```
 
-## Getting Help
+## 🆘 Getting Help
 
 For usage related questions, please use [Github Discussions](https://github.com/LuxDL/Lux.jl/discussions) or [JuliaLang Discourse (machine learning domain)](https://discourse.julialang.org/c/domain/ml/) which allows questions and answers to be indexed. To report bugs use [github issues](https://github.com/LuxDL/Lux.jl/issues) or even better send in a [pull request](https://github.com/LuxDL/Lux.jl/pulls).
 
-## Citation
+## 🧑‍🔬 Citation
 
 If you found this library to be useful in academic work, then please cite:
 
diff --git a/docs/src/api/Lux/autodiff.md b/docs/src/api/Lux/autodiff.md
index 7f97aeeae..39e3c1e3e 100644
--- a/docs/src/api/Lux/autodiff.md
+++ b/docs/src/api/Lux/autodiff.md
@@ -1,4 +1,4 @@
-# Automatic Differentiation
+# [Automatic Differentiation](@id autodiff-lux)
 
 Lux is not an AD package, but it composes well with most of the AD packages available in the
 Julia ecosystem. This document lists the current level of support for various AD packages in
diff --git a/docs/src/introduction/index.md b/docs/src/introduction/index.md
index 7d3844ad5..46a35f283 100644
--- a/docs/src/introduction/index.md
+++ b/docs/src/introduction/index.md
@@ -4,16 +4,11 @@
 
 Install [Julia v1.10 or above](https://julialang.org/downloads/). Lux.jl is available
 through the Julia package manager. You can enter it by pressing `]` in the REPL and then
-typing
+typing `add Lux`. Alternatively, you can also do
 
 ```julia
-pkg> add Lux
-```
-
-Alternatively, you can also do
-
-```julia
-import Pkg; Pkg.add("Lux")
+import Pkg
+Pkg.add("Lux")
 ```
 
 ## Quickstart
diff --git a/docs/src/introduction/overview.md b/docs/src/introduction/overview.md
index 1ae5b32e9..3405964a7 100644
--- a/docs/src/introduction/overview.md
+++ b/docs/src/introduction/overview.md
@@ -28,20 +28,24 @@ it both compiler and autodiff friendly.
   [edge cases and limitations](https://fluxml.ai/Optimisers.jl/dev/api/#Optimisers.destructure). Lux
   forces users to make an explicit distinction between state variables and parameter
   variables to avoid these issues. Also, it comes battery-included for distributed training.
-  
+
 * **Sensible display of Custom Layers** -- Ever wanted to see Pytorch like Network printouts
   or wondered how to extend the pretty printing of Flux's layers? Lux handles all of that
   by default.
-  
+
 * **Truly immutable models** - No *unexpected internal mutations* since all layers are
-  implemented as pure functions. All layers are also *deterministic* given the parameters and
-  state: if a layer is supposed to be stochastic (say `Dropout`), the state must contain a
-  seed which is then updated after the function call.
+  implemented as pure functions. All layers are also *deterministic* given the parameters
+  and state: if a layer is supposed to be stochastic (say [`Dropout`](@ref)), the state
+  must contain a seed which is then updated after the function call.
 
 * **Easy Parameter Manipulation** -- By separating parameter data and layer structures,
-  Lux makes implementing `WeightNorm`, `SpectralNorm`, etc. downright trivial.
-  Without this separation, it is much harder to pass such parameters
-  around without mutations which AD systems don't like.
+  Lux makes implementing [`WeightNorm`](@ref), `SpectralNorm`, etc. downright trivial.
+  Without this separation, it is much harder to pass such parameters around without
+  mutations which AD systems don't like.
+
+* **Wider AD Support** -- Lux has extensive support for most
+  [AD systems in julia](@ref autodiff-lux), while Flux is mostly tied to Zygote (with some
+  initial support for Enzyme).
 
 * **Small Neural Networks on CPU** -- Lux is developed for training large neural networks.
   For smaller architectures, we recommend using
@@ -58,3 +62,4 @@ it both compiler and autodiff friendly.
   For these, python frameworks like PyTorch and Jax are better suited.
 
 * **XLA Support** -- Lux doesn't compile to XLA which means no TPU support unfortunately.
+  We are currently actively working on XLA support via [Reactant.jl](https://github.com/EnzymeAD/Reactant.jl).
diff --git a/test/runtests.jl b/test/runtests.jl
index 7769c67cb..0c4272544 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,7 +16,7 @@ end
 const EXTRA_PKGS = String[]
 
 if ("all" in LUX_TEST_GROUP || "distributed" in LUX_TEST_GROUP)
-    push!(EXTRA_PKGS, "MPI")
+    BACKEND_GROUP != "amdgpu" && push!(EXTRA_PKGS, "MPI")
     (BACKEND_GROUP == "all" || BACKEND_GROUP == "cuda") && push!(EXTRA_PKGS, "NCCL")
 end
 ("all" in LUX_TEST_GROUP || "others" in LUX_TEST_GROUP) && push!(EXTRA_PKGS, "Flux")
@@ -41,7 +41,7 @@ for tag in LUX_TEST_GROUP
 end
 
 # Distributed Tests
-if "all" in LUX_TEST_GROUP || "distributed" in LUX_TEST_GROUP
+if ("all" in LUX_TEST_GROUP || "distributed" in LUX_TEST_GROUP) && BACKEND_GROUP != "amdgpu"
     using MPI
 
     nprocs_str = get(ENV, "JULIA_MPI_TEST_NPROCS", "")
diff --git a/test/setup_modes.jl b/test/setup_modes.jl
index 956979ab5..bc6e90eb9 100644
--- a/test/setup_modes.jl
+++ b/test/setup_modes.jl
@@ -1,6 +1,4 @@
-using Lux, LuxDeviceUtils, GPUArraysCore, Pkg
-
-GPUArraysCore.allowscalar(false)
+using Lux, LuxDeviceUtils
 
 if !@isdefined(BACKEND_GROUP)
     const BACKEND_GROUP = lowercase(get(ENV, "BACKEND_GROUP", "all"))