-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add PaddedDiskArray
and pad
#219
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
""" | ||
PaddedDiskArray <: AbstractDiskArray | ||
|
||
PaddedDiskArray(A, padding; fill=zero(eltype(A))) | ||
|
||
An `AbstractDiskArray` that adds padding to the edges of the parent array. | ||
This can help changing chunk offsets or padding a larger than memory array | ||
before windowing operations. | ||
|
||
# Arguments | ||
|
||
- `A`: The parent disk array. | ||
- `padding`: A tuple of `Int` lower and upper padding tuples, one for each dimension. | ||
|
||
# Keywords | ||
|
||
- `fill=zero(eltype(A))`: The value to pad the array with. | ||
""" | ||
struct PaddedDiskArray{T,N,A<:AbstractArray{T,N},C,F<:T} <: AbstractDiskArray{T,N} | ||
parent::A | ||
padding::NTuple{N,Tuple{Int,Int}} | ||
fill::F | ||
chunks::C | ||
end | ||
function PaddedDiskArray(A::AbstractArray{T,N}, padding::NTuple{N,Tuple{Int,Int}}; | ||
fill=zero(eltype(A)), | ||
) where {T,N} | ||
chunks = GridChunks(map(_pad_offset, eachchunk(A).chunks, padding, size(A))) | ||
PaddedDiskArray(A, padding, fill, chunks) | ||
end | ||
|
||
function _pad_offset(c::RegularChunks, (low, high), s) | ||
chunksize = c.cs | ||
offset = low > 0 ? c.offset - low + chunksize : c.offset | ||
size = c.s + low + high | ||
return RegularChunks(chunksize, offset, size) | ||
end | ||
function _pad_offset(c::IrregularChunks, (low, high), s) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not really obvious to me what the code should do here. Currently this adds an extra chunk at the beginning for the padded values. Should the values padded at the end be in a new chunk as well? Or should padded values simply be lumped into the first and last chunk. However, something is still wrong with the implementation, see this example: c2 = IrregularChunks(chunksizes = [10, 10, 20, 30, 40])
_pad_offset(c2, (5,5), 100) which returns chunks only up to 105 while the resulting array should have length 120. |
||
offsets = Vector{Int}(undef, length(c.offsets) + 1) | ||
# First offset is always zero | ||
offsets[begin] = 0 | ||
# Increase original offsets by lower padding | ||
for (i, o) in enumerate(c.offsets) | ||
offsets[i + 1] = o + low | ||
end | ||
# Add offset for start of upper padding | ||
offsets[end] = s + low | ||
return IrregularChunks(offsets) | ||
end | ||
|
||
Base.parent(A::PaddedDiskArray) = A.parent | ||
function Base.size(A::PaddedDiskArray) | ||
map(size(parent(A)), A.padding) do s, (low, high) | ||
s + low + high | ||
end | ||
end | ||
|
||
readblock!(A::PaddedDiskArray, data, I...) = _readblock_padded!(A, data, I...) | ||
readblock!(A::PaddedDiskArray, data, I::AbstractVector...) = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious why this is necessary, are the fallbacks too slow here? |
||
_readblock_padded(A, data, I...) | ||
writeblock!(A::PaddedDiskArray, data, I...) = | ||
throw(ArgumentError("Cannot write to a PaddedDiskArray")) | ||
haschunks(A::PaddedDiskArray) = haschunks(parent(A)) | ||
eachchunk(A::PaddedDiskArray) = A.chunks | ||
|
||
function _readblock_padded(A, data, I...) | ||
data .= A.fill | ||
Ipadded = map(I, A.padding) do i, (low, high) | ||
i .- low | ||
end | ||
fs = map(axes(parent(A)), Ipadded) do a, ip | ||
searchsortedfirst(ip, first(a)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This assumes indices |
||
end | ||
ls = map(axes(parent(A)), Ipadded) do a, ip | ||
searchsortedlast(ip, last(a)) | ||
end | ||
return if all(map(<=, fs, ls)) | ||
Idata = map(:, fs, ls) | ||
Iparent = map(getindex, Ipadded, Idata) | ||
@show Idata Iparent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
readblock!(parent(A), view(data, Idata...), Iparent...) | ||
else | ||
# No overlap, don't read | ||
data | ||
end | ||
end | ||
|
||
""" | ||
|
||
pad(A, padding; fill=zero(eltype(A))) | ||
|
||
Pad any `AbstractArray` with fill values, updating chunk patterns. | ||
|
||
# Arguments | ||
|
||
- `A`: The parent disk array. | ||
- `padding`: A tuple of `Int` lower and upper padding tuples, one for each dimension. | ||
|
||
# Keywords | ||
|
||
- `fill=zero(eltype(A))`: The value to pad the array with. | ||
""" | ||
pad(A::AbstractArray{<:Any,N}, padding::NTuple{N,Tuple{Int,Int}}; kw...) where N = | ||
PaddedDiskArray(A, padding; kw...) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -962,6 +962,29 @@ end | |
end | ||
end | ||
|
||
@testset "Padded disk arrays" begin | ||
M = (1:100) * (1:120)' | ||
A = cat(M, 2M, 3M, 4M; dims=3) | ||
ch = ChunkedDiskArray(A, (128, 128, 2)) | ||
pa = DiskArrays.pad(ch, ((10, 20), (30, 40), (1, 2)); fill=999) | ||
@test size(pa) == (130, 190, 7) | ||
# All outside | ||
@test all(==(999), pa[1:10, 1:30, 1:1]) | ||
# All inside | ||
@test pa[11:20, 31:40, 3:4] == ch[1:10, 1:10, 2:3] | ||
# Overlapping lower | ||
regions = pa[10:20, 30:40, 1:2] | ||
testdata = copy(regions) .= 999 | ||
testdata[2:11, 2:11, 2:2] .= ch[1:10, 1:10, 1:1] | ||
@test regions == testdata | ||
# Overlapping upper | ||
regions = pa[101:120, 141:160, 5:7] | ||
testdata = copy(regions) .= 999 | ||
testdata[1:10, 1:10, 1:1] .= ch[91:100, 111:120, 4:4] | ||
testdata | ||
@test regions == testdata | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add the following tests for _pad_offset: c1 = RegularChunks(10, 0, 100)
_pad_offset(c1, (5,2), 100) == RegularChunks(10,5,107)
_pad_offset(c1, (30,2), 100) == RegularChunks(10,0,132)
c2 = IrregularChunks(chunksizes = [10, 10, 20, 30, 40])
#The following test would assume padding ends up in a separate chunk:
_pad_offset(c2, (5,5), 100) == IrregularChunks(chunksizes = [5,10, 10, 20, 30, 40,5]) |
||
end | ||
|
||
@testset "Range subset identification" begin | ||
inds = [1,2,2,3,5,6,7,10,10] | ||
readranges, offsets = DiskArrays.find_subranges_sorted(inds,false) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens if
low > chunksize
so we pad more than a chunk. I think then we would arrive at negative offsets.