From 2b8e39c72dc75dd51d788ce0fcf5bd4122eaa13f Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Sun, 26 Jan 2025 13:34:18 -0700 Subject: [PATCH] :'( Put back a generated func for unsafe_load large structs I can't find any other variant that produces non-allocating, fast code for structs with > 32 fields. Since `blob[]` is a pretty important feature of the blobs package, I think we cannot do without this change. I don't know how often we have such big structs, but if we do, we will need this. Too bad though! Some alternatives I tried, but none of them worked: ```julia @_make_new(T, ntuple(I->unsafe_load(getindex(blob, fieldname(T,I))), fieldcount(T))) @_make_new(T, Tuple(unsafe_load(getindex(blob, i)) for i in 1:fieldcount(T))) @_make_new(T, Tuple(map(i->unsafe_load(getindex(blob, i)), 1:fieldcount(T)))) TT = Tuple{fieldtypes(T)...} @_make_new(T, ((unsafe_load(getindex(blob, i)) for i in 1:fieldcount(T))...,)) ``` --- src/blob.jl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/blob.jl b/src/blob.jl index ea4d73e..b4f27b4 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -210,19 +210,28 @@ end @inline function Base.unsafe_load(blob::Blob{T}) where {T} if isempty(fieldnames(T)) unsafe_load(pointer(blob)) - else - # This recursive definition is *almost* as fast as the `@generated` code. On julia - # 1.10, it has a single invoke function call here, which adds a few ns overhead. - # But on julia 1.11, this generates the expected code and is just as fast. - # We are sticking with this version though, to save the `@generated` compilation time. + elseif fieldcount(T) <= 32 @_make_new(T, _unsafe_load_fields(blob, Val(fieldcount(T)))) + else + _unsafe_load_many_fields(blob) end end @inline _unsafe_load_fields(::Blob, ::Val{0}) = () -function _unsafe_load_fields(blob::Blob{T}, ::Val{I}) where {T, I} - @inline - types = fieldnames(T) - return (_unsafe_load_fields(blob, Val(I-1))..., unsafe_load(getindex(blob, types[I]))) +@inline function _unsafe_load_fields(blob::Blob{T}, ::Val{I}) where {T, I} + return (_unsafe_load_fields(blob, Val(I-1))..., unsafe_load(getindex(blob, I))) +end +# We really want to get rid of all `@generated` functions in this codebase, +# but we were unable to produce fast, non-allocating code for +# `unsafe_load(::Blob{T})` where fieldcount(T) > 32. So we use this +# fallback for the case of many fields. As far as I can tell, this +# is not reached for <= 32 fields. +@generated function _unsafe_load_many_fields(blob::Blob{T}) where {T} + quote + $(Expr(:meta, :inline)) + $(Expr(:new, T, @splice I in 1:fieldcount(T) quote + unsafe_load(getindex(blob, $(I))) + end)) + end end @inline function Base.unsafe_store!(blob::Blob{T}, value::T) where {T} @@ -234,9 +243,6 @@ end value end end -# On julia 1.11, this is equivalantly fast to the `@generated` version. -# On julia 1.10, this is about 2x slower than generated for medium structs: ~10 ns vs ~5 ns. -# We will go with the recursive version, to avoid the compilation cost. @inline _unsafe_store!(::Blob{T}, ::T, ::Val{0}) where {T} = nothing function _unsafe_store!(blob::Blob{T}, value::T, ::Val{I}) where {T, I} @inline