Skip to content

Commit

Permalink
Improve perf for dynamic index / fieldname (Restore the generated fun…
Browse files Browse the repository at this point in the history
…ction perf) (#35)

* Improve perf for getindex with non-const value

* Repeat the same change for getproperty field name lookup

* Bump version 1.1.1

* fixup

* Update src/blob.jl

* Cap the tuple-building at 32-elements, due to julia tuple inference limits

* Add performance-cutoff for length-32 structs
  • Loading branch information
NHDaly authored Jan 24, 2025
1 parent 7c4e441 commit 6668537
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 13 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Blobs"
uuid = "163b9779-6631-5f90-a265-3de947924de8"
authors = []
version = "1.1.0"
version = "1.1.1"

[deps]
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand Down
55 changes: 44 additions & 11 deletions src/blob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,27 +118,60 @@ Base.@assume_effects :foldable function _recursive_sum_field_sizes(::Type{T}, ::
end

# Recursion scales better than splatting for large numbers of fields.
Base.@assume_effects :foldable @inline function blob_offset(::Type{T}, i::Int) where {T}
_recursive_sum_field_sizes(T, Val(i - 1))
@inline function blob_offset(::Type{T}, i::Int) where {T}
# Beyond this size, the tuple-construction in blob_offsets(T) refuses to const-fold,
# in the *dynamic* `i` case, so we would end up with runtime tuple
# construction and many many allocations.
# For larger structs, doing dynamic field access, we elect to have a single
# dynamic dispatch here with friendlier performance.
if fieldcount(T) <= 32
blob_offsets(T)[i]
else
_recursive_sum_field_sizes(T, Val(i - 1))
end
end

Base.@assume_effects :foldable function blob_offsets(::Type{T}) where {T}
_recursive_field_offsets(T)
end
_recursive_field_offsets(::Type{T}) where {T} =
_recursive_field_offsets(T, Val(fieldcount(T)))
_recursive_field_offsets(::Type, ::Val{0}) = ()
_recursive_field_offsets(::Type, ::Val{1}) = (0,)
function _recursive_field_offsets(::Type{T}, ::Val{i}) where {T,i}
tup = _recursive_field_offsets(T, Val(i-1))
return (tup..., tup[end] + self_size(fieldtype(T, i-1)))
end


# Manually write a compile-time loop in the type domain, to enforce constant-folding the
# fieldidx even for large structs (with e.g. 100 fields). This might make compiling a touch
# slower, but it allows this to work for even large structs, like the manually-written
# fieldindexes even for large structs (with e.g. 100 fields). This might make compiling a
# touch slower, but it allows this to work for even large structs, like the manually-written
# `@generated` functions did before.
@inline function fieldidx(::Type{T}, ::Val{field}) where {T,field}
return _fieldidx_lookup(T, Val(field), Val(fieldcount(T)))
Base.@assume_effects :foldable function fieldindexes(::Type{T}) where {T}
return _recursive_fieldindexes(T, Val(fieldcount(T)))
end
_recursive_fieldindexes(::Type{T}, ::Val{0}) where {T} = ()
function _recursive_fieldindexes(::Type{T}, ::Val{i}) where {T,i}
next = _recursive_fieldindexes(T, Val(i-1))
names = (fieldnames(typeof(next))..., fieldname(T, i))
return NamedTuple{names}((next..., i))
end
_fieldidx_lookup(::Type{T}, ::Val{field}, ::Val{0}) where {T,field} =
error("$T has no field $field")
_fieldidx_lookup(::Type{T}, ::Val{field}, ::Val{i}) where {T,i,field} =
fieldname(T, i) === field ? i : _fieldidx_lookup(T, Val(field), Val(i-1))

# NOTE: An important optimization here is that the static operations that can be performed
# only on the type do not depend on the possibly runtime value `field`. We precompute the
# fieldname => fieldidx lookup table at compile time (as a NamedTuple), then use it at
# runtime. If the field is a known compiler constant (as in the `x.y` case), all the better.
@inline function Base.getindex(blob::Blob{T}, field::Symbol) where {T}
i = fieldidx(T, Val(field))
fieldidx_lookup = fieldindexes(T)
if !haskey(fieldidx_lookup, field)
_throw_missing_field_error(T, field)
end
i = fieldidx_lookup[field]
FT = fieldtype(T, i)
Blob{FT}(blob + blob_offset(T, i))
end
@noinline _throw_missing_field_error(T, field) = error("$T has no field $field")

@noinline function _throw_getindex_boundserror(blob::Blob, i::Int)
throw(BoundsError(blob, i))
Expand Down
1 change: 0 additions & 1 deletion test/compat-tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ bbv = Blobs.malloc_and_init(BlobBitVector, 3)
pbv = @v bbv
pbv[2] = true
@test pbv[2] == true
@test pv[2] == Foo(2, 2.2)
pbv[1] = false
pbv[3] = false
# tests iteration
Expand Down

0 comments on commit 6668537

Please sign in to comment.