From ca855db4551b17a615f817bfc84039b75ab73b52 Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Tue, 4 Oct 2022 11:54:39 +0200 Subject: [PATCH 1/4] first try --- examples/diffusion3D_novis_noperf.jl | 9 +++++++-- src/ParallelKernel/allocators.jl | 26 +++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/examples/diffusion3D_novis_noperf.jl b/examples/diffusion3D_novis_noperf.jl index 2a161b1a..c2797dcb 100644 --- a/examples/diffusion3D_novis_noperf.jl +++ b/examples/diffusion3D_novis_noperf.jl @@ -1,10 +1,14 @@ -const USE_GPU = true +using ThreadPinning +pinthreads(:spread) +const USE_GPU = false using ParallelStencil using ParallelStencil.FiniteDifferences3D @static if USE_GPU @init_parallel_stencil(CUDA, Float64, 3); else @init_parallel_stencil(Threads, Float64, 3); + @show Threads.nthreads() + @show ThreadPinning.getcpuids() end @parallel function diffusion3D_step!(T2, T, Ci, lam, dt, dx, dy, dz) @@ -46,4 +50,5 @@ end end -diffusion3D() +@time diffusion3D(); +@time diffusion3D(); diff --git a/src/ParallelKernel/allocators.jl b/src/ParallelKernel/allocators.jl index 033b9d04..5941a5c5 100644 --- a/src/ParallelKernel/allocators.jl +++ b/src/ParallelKernel/allocators.jl @@ -40,14 +40,38 @@ macro rand_threads(args...) check_initialized(); esc(_rand(args...; package=PKG ## ALLOCATOR FUNCTIONS +# function _zeros(args...; package::Symbol=get_package()) +# numbertype = get_numbertype() +# if (package == PKG_CUDA) return :(CUDA.zeros($numbertype, $(args...))) +# elseif (package == PKG_THREADS) return :(Base.zeros($numbertype, $(args...))) +# else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") +# end +# end function _zeros(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.zeros($numbertype, $(args...))) - elseif (package == PKG_THREADS) return :(Base.zeros($numbertype, $(args...))) + elseif (package == PKG_THREADS) + return quote + arr = Array{$numbertype}(undef, $(args...)) + Threads.@threads for i in eachindex(arr) + @inbounds arr[i] = zero($numbertype) + end + arr + end else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end +# macro asd(args...) +# quote +# tmp = Array{$numbertype}(undef, $(args...)) +# Threads.@threads for i in eachindex(tmp) +# @inbounds tmp[i] = zero($numbertype) +# end +# tmp +# end +# end + function _ones(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.ones($numbertype, $(args...))) From da12413b70e0dd5a6bef414a76b0c2513f1e4ea2 Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Tue, 4 Oct 2022 13:30:46 +0200 Subject: [PATCH 2/4] kinda works --- src/ParallelKernel/allocators.jl | 35 +++++++++----------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/ParallelKernel/allocators.jl b/src/ParallelKernel/allocators.jl index 5941a5c5..442cf182 100644 --- a/src/ParallelKernel/allocators.jl +++ b/src/ParallelKernel/allocators.jl @@ -40,38 +40,14 @@ macro rand_threads(args...) check_initialized(); esc(_rand(args...; package=PKG ## ALLOCATOR FUNCTIONS -# function _zeros(args...; package::Symbol=get_package()) -# numbertype = get_numbertype() -# if (package == PKG_CUDA) return :(CUDA.zeros($numbertype, $(args...))) -# elseif (package == PKG_THREADS) return :(Base.zeros($numbertype, $(args...))) -# else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") -# end -# end function _zeros(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.zeros($numbertype, $(args...))) - elseif (package == PKG_THREADS) - return quote - arr = Array{$numbertype}(undef, $(args...)) - Threads.@threads for i in eachindex(arr) - @inbounds arr[i] = zero($numbertype) - end - arr - end + elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel._parallel_init(Base.zero, $numbertype, $(args...))) else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end -# macro asd(args...) -# quote -# tmp = Array{$numbertype}(undef, $(args...)) -# Threads.@threads for i in eachindex(tmp) -# @inbounds tmp[i] = zero($numbertype) -# end -# tmp -# end -# end - function _ones(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.ones($numbertype, $(args...))) @@ -87,3 +63,12 @@ function _rand(args...; package::Symbol=get_package()) else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end + +function _parallel_init(f::F, numbertype::D, args...) where {F, D<:DataType} + arr = Array{numbertype, length(args)}(undef, args...) + Threads.@threads :static for i in eachindex(arr) + # @inbounds arr[i] = f(numbertype) + @inbounds arr[i] = f(Float64) + end + return arr +end From 04c2b11f5844a9ef64e0a4d367b368468bb9589d Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Tue, 4 Oct 2022 15:36:14 +0200 Subject: [PATCH 3/4] works --- src/ParallelKernel/allocators.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ParallelKernel/allocators.jl b/src/ParallelKernel/allocators.jl index 442cf182..a07ab3a6 100644 --- a/src/ParallelKernel/allocators.jl +++ b/src/ParallelKernel/allocators.jl @@ -51,7 +51,7 @@ end function _ones(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.ones($numbertype, $(args...))) - elseif (package == PKG_THREADS) return :(Base.ones($numbertype, $(args...))) + elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel._parallel_init(Base.one, $numbertype, $(args...))) else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end @@ -59,16 +59,15 @@ end function _rand(args...; package::Symbol=get_package()) numbertype = get_numbertype() if (package == PKG_CUDA) return :(CUDA.CuArray(rand($numbertype, $(args...)))) - elseif (package == PKG_THREADS) return :(Base.rand($numbertype, $(args...))) + elseif (package == PKG_THREADS) return :(ParallelStencil.ParallelKernel._parallel_init(Base.rand, $numbertype, $(args...))) else @KeywordArgumentError("$ERRMSG_UNSUPPORTED_PACKAGE (obtained: $package).") end end -function _parallel_init(f::F, numbertype::D, args...) where {F, D<:DataType} +function _parallel_init(f::F, numbertype::Type{T}, args...) where {F, T} arr = Array{numbertype, length(args)}(undef, args...) Threads.@threads :static for i in eachindex(arr) - # @inbounds arr[i] = f(numbertype) - @inbounds arr[i] = f(Float64) + @inbounds arr[i] = f(T) end return arr end From 023445182cc1e8b2afb94f9e0d654d9a3abe500f Mon Sep 17 00:00:00 2001 From: Carsten Bauer Date: Tue, 4 Oct 2022 16:34:12 +0200 Subject: [PATCH 4/4] undo --- examples/diffusion3D_novis_noperf.jl | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/examples/diffusion3D_novis_noperf.jl b/examples/diffusion3D_novis_noperf.jl index c2797dcb..2a161b1a 100644 --- a/examples/diffusion3D_novis_noperf.jl +++ b/examples/diffusion3D_novis_noperf.jl @@ -1,14 +1,10 @@ -using ThreadPinning -pinthreads(:spread) -const USE_GPU = false +const USE_GPU = true using ParallelStencil using ParallelStencil.FiniteDifferences3D @static if USE_GPU @init_parallel_stencil(CUDA, Float64, 3); else @init_parallel_stencil(Threads, Float64, 3); - @show Threads.nthreads() - @show ThreadPinning.getcpuids() end @parallel function diffusion3D_step!(T2, T, Ci, lam, dt, dx, dy, dz) @@ -50,5 +46,4 @@ end end -@time diffusion3D(); -@time diffusion3D(); +diffusion3D()