Skip to content

Commit

Permalink
Make conversion from custom serialization work even when original typ…
Browse files Browse the repository at this point in the history
…e cannot be understood. (#468)

* Initial attempt to overload readas

* Add example cases

* Fix test setup errors

* Use readas

* Add docstring and remove temp files

* Add CHANGELOG entry

* Improve docstring

* Add tests for readas

* Bump version
  • Loading branch information
KnutAM authored Jul 10, 2023
1 parent b41d604 commit 32aae76
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 0.4.32
- add experimental `JLD2.readas` function for customized reading of custom serialized objects (#468)

## 0.4.31
- fix UInt32 truncation error for absurdly large array sizes
- move test-files to a separate repo
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "JLD2"
uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
version = "0.4.31"
version = "0.4.32"

[deps]
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
Expand Down
1 change: 0 additions & 1 deletion src/data/custom_serialization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# The default, write a type as itself
writeas(T::Type) = T


# wconvert and rconvert do type conversion before reading and writing,
# respectively. These fall back to convert.
wconvert(T, x) = convert(T, x)
Expand Down
59 changes: 51 additions & 8 deletions src/data/reconstructing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,49 @@ function check_empty(attrs::Vector{ReadAttribute})
false
end

"""
readas(::Type)::Type
**Experimental feature**:
`JLD2.readas` can be overloaded to override which type a saved type is read as,
and is used together with custom serialization using [`JLD2.writeas`](@ref).
The typical case is custom serialization of parametric types,
where not all type parameters are available during reading.
Consider the following example for an anonymous function `fun` inside a `Foo`
```julia
struct Foo{F<:Function}
fun::F
end
struct FooSerialization
fun
end
JLD2.writeas(::Type{<:Foo}) = FooSerialization
Base.convert(::Type{<:FooSerialization}, f::Foo) = FooSerialization(f.fun)
JLD2.readas(::Type{<:FooSerialization}) = Foo
struct UndefinedFunction <:Function
fun
end
(f::UndefinedFunction)(args...; kwargs...) = error("The function \$(f.fun) is not defined")
function Base.convert(::Type{<:Foo}, f::FooSerialization)
isa(f.fun, Function) && return Foo(f.fun)
return Foo(UndefinedFunction(f.fun))
end
```
If we include these definitions, call `jldsave("foo.jld2"; foo=Foo(x->x^2))`,
restart julia, include the definitions again, and call
`foo = jldopen("foo.jld2") do io; io["foo"]; end`, we get
`foo::Foo{UndefinedFunction}` and `foo::FooSerialization`
with and without defining the `JLD2.readas` above, respectively.
"""
readas(::Any) = nothing # default to nothing to do nothing if no overload is specified.

function _readas(T_custom, T_in)
T_out = readas(T_custom)::Union{Type,Nothing}
return ifelse(isnothing(T_out), T_in, T_out)
end

# jltype is the inverse of h5type, providing a ReadRepresentation for an
# H5Datatype. We handle committed datatypes here, and other datatypes below.
function jltype(f::JLDFile, cdt::CommittedDatatype)
Expand Down Expand Up @@ -56,18 +99,18 @@ function jltype(f::JLDFile, cdt::CommittedDatatype)
datatype = read_attr_data(f, julia_type_attr)
if written_type_attr !== nothing
# Custom serialization
readas = datatype
datatype = read_attr_data(f, written_type_attr)
if isa(readas, UnknownType)
@warn("custom serialization of $(typestring(readas))" *
custom_datatype = read_attr_data(f, written_type_attr)
read_as = _readas(custom_datatype, datatype)
if isa(read_as, UnknownType)
@warn("custom serialization of $(typestring(read_as))" *
" encountered, but the type does not exist in the workspace; the data will be read unconverted")
rr = (constructrr(f, datatype, dt, attrs)::Tuple{ReadRepresentation,Bool})[1]
rr = (constructrr(f, custom_datatype, dt, attrs)::Tuple{ReadRepresentation,Bool})[1]
canonical = false
else
rr, canonical = constructrr(f, datatype, dt, attrs)::Tuple{ReadRepresentation,Bool}
rr, canonical = constructrr(f, custom_datatype, dt, attrs)::Tuple{ReadRepresentation,Bool}
rrty = typeof(rr)
rr = ReadRepresentation{readas, CustomSerialization{rrty.parameters[1], rrty.parameters[2]}}()
canonical = canonical && writeas(readas) === datatype
rr = ReadRepresentation{read_as, CustomSerialization{rrty.parameters[1], rrty.parameters[2]}}()
canonical = canonical && writeas(read_as) === custom_datatype
end
else
rr, canonical = constructrr(f, datatype, dt, attrs)::Tuple{ReadRepresentation,Bool}
Expand Down
64 changes: 64 additions & 0 deletions test/readas.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
@testset "readas" begin
definitions = """
using JLD2, Test
struct UndefinedFunction <:Function
fun
end
(f::UndefinedFunction)(args...; kwargs...) = error("The function \$(f.fun) is not defined")
# Case when readas is defined
# If `F` doesn't exist when reading, should be read as `Foo{UndefinedFunction}`
struct Foo{F<:Function}
fun::F
end
struct FooSerialization
fun
end
JLD2.writeas(::Type{<:Foo}) = FooSerialization
Base.convert(::Type{<:FooSerialization}, f::Foo) = FooSerialization(f.fun)
JLD2.readas(::Type{<:FooSerialization}) = Foo
function Base.convert(::Type{<:Foo}, f::FooSerialization)
isa(f.fun, Function) && return Foo(f.fun)
return Foo(UndefinedFunction(f.fun))
end
# Case when readas is not defined (N)
# If `F` doesn't exist when reading, should be read as `FooNSerialization`
struct FooN{F<:Function}
fun::F
end
struct FooNSerialization
fun
end
JLD2.writeas(::Type{<:FooN}) = FooNSerialization
Base.convert(::Type{<:FooNSerialization}, f::FooN) = FooNSerialization(f.fun)
Base.convert(::Type{<:FooN}, f::FooNSerialization) = FooN(f.fun)
"""
save_files = definitions*"""
jldsave("readas_foo_sin.jld2"; foo=Foo(sin))
fun(x) = x^2
jldsave("readas_foo_a.jld2"; foo=Foo(fun))
jldsave("readas_foo_n_sin.jld2"; foo=FooN(sin))
jldsave("readas_foo_n_a.jld2"; foo=FooN(fun))
"""

read_files = definitions*"""
getfoo(file) = jldopen(file) do io; io["foo"]; end
@test getfoo("readas_foo_sin.jld2") isa Foo{typeof(sin)}
@test getfoo("readas_foo_n_sin.jld2") isa FooN{typeof(sin)}
@test getfoo("readas_foo_a.jld2") isa Foo{UndefinedFunction}
@test getfoo("readas_foo_n_a.jld2") isa FooNSerialization
rm("readas_foo_sin.jld2")
rm("readas_foo_n_sin.jld2")
rm("readas_foo_a.jld2")
rm("readas_foo_n_a.jld2")
"""
save_cmd = `$(Base.julia_cmd()) --project=$(pwd()) -e $(save_files)`
read_cmd = `$(Base.julia_cmd()) --project=$(pwd()) -e $(read_files)`
cd(mktempdir(pwd())) do
@test better_success(save_cmd)
@test better_success(read_cmd)
end
end
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ include("inlineunion.jl")
include("customserialization.jl")
include("compression.jl")
include("test_files.jl")
include("unpack_test.jl")
include("unpack_test.jl")
include("readas.jl")

2 comments on commit 32aae76

@JonasIsensee
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/87163

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.32 -m "<description of version>" 32aae76cc7ad4c1a5f14e68a6abb0d82a68ae079
git push origin v0.4.32

Please sign in to comment.