Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize load time and TTFX for non-comparison cases. #137

Merged
merged 8 commits into from
Nov 24, 2024
65 changes: 37 additions & 28 deletions src/benchmarking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,15 @@ function benchmark(init, setup, fs::Tuple{Vararg{Any, N}}, teardown;
samples::Union{Int, Nothing}=nothing,
seconds::Union{Real, Nothing}=(samples===nothing ? DEFAULTS.seconds : 10*DEFAULTS.seconds)*N,
gc::Bool=DEFAULTS.gc) where N
_benchmark_1(init, setup, teardown, evals, samples, seconds, gc, fs...)
end
_benchmark_1(init, setup, teardown, evals::Union{Int, Nothing}, samples::Union{Int, Nothing}, seconds::Real, gc::Bool, fs...) =
_benchmark_1(init, setup, teardown, evals, samples, Float64(seconds), gc, fs...)
function _benchmark_1(init, setup, teardown, evals::Union{Int, Nothing}, samples::Union{Int, Nothing}, seconds::Union{Float64, Nothing}, gc::Bool, fs...)
@nospecialize
N = length(fs)

if seconds !== nothing && seconds >= 2.0^63*1e-9
if seconds !== nothing && seconds >= 9.223372036854776e9 # 2.0^63*1e-9
samples === nothing && throw(ArgumentError("samples must be specified if seconds is infinite or nearly infinite (more than 292 years)"))
seconds = nothing
end
Expand All @@ -36,27 +42,9 @@ function benchmark(init, setup, fs::Tuple{Vararg{Any, N}}, teardown;

args1 = maybecall(init, ())

function bench(evals, warmup=true)
p = N == 1 ? (1,) : N == 2 ? rand() < .5 ? (1,2) : (2,1) : randperm(N)
t = Ref(zero(UInt64))
args2 = maybecall(setup, args1)
rp = ntuple(N) do i
old_gc = gc || GC.enable(false)
sample, ti, args3 = try
_benchmark(fs[p[i]], args2, evals, warmup)
finally
gc || GC.enable(old_gc)
end
maybecall(teardown, (args3,))
t[] = ti
sample
end
ntuple(i -> rp[p[i]], N), t[]
end

samples == 0 && return ntuple(i -> Benchmark([bench(evals, false)[1][i]]), N)
samples == 0 && return ntuple(i -> Benchmark([_benchmark_2(args1, setup, teardown, gc, evals, false, fs...)[1][i]]), N)

warmup, start_time = bench(1, false)
warmup, start_time = _benchmark_2(args1, setup, teardown, gc, 1, false, fs...)

seconds == 0 && return ntuple(i -> Benchmark([warmup[i]]), N)
new_evals = if evals === nothing
Expand All @@ -68,7 +56,7 @@ function benchmark(init, setup, fs::Tuple{Vararg{Any, N}}, teardown;
return ntuple(i -> Benchmark([warmup[i]]), N)
end

calibration1, time = bench(1)
calibration1, time = _benchmark_2(args1, setup, teardown, gc, 1, true, fs...)

# We should be spending about 5% of runtime on calibration.
# If we spent less than 1% then recalibrate with more evals.
Expand All @@ -77,14 +65,14 @@ function benchmark(init, setup, fs::Tuple{Vararg{Any, N}}, teardown;
calibration2 = nothing
calibration2time = nothing
if calibration1time < .00015seconds # This branch protects us against cases where runtime is dominated by the reduction.
calibration2, time = bench(10)
calibration2, time = _benchmark_2(args1, setup, teardown, gc, 10, true, fs...)
calibration2time = sum(s.time for s in calibration2)
trials = floor(Int, .05seconds/(calibration2time+1e-9))
if trials > 20
calibration2, time = bench(trials)
calibration2, time = _benchmark_2(args1, setup, teardown, gc, trials, true, fs...)
end
elseif calibration1time < .01seconds
calibration2, time = bench(floor(Int, .05seconds/(calibration1time+1e-9)))
calibration2, time = _benchmark_2(args1, setup, teardown, gc, floor(Int, .05seconds/(calibration1time+1e-9)), true, fs...)
end
if calibration2 !== nothing
calibration2time = sum(s.time for s in calibration2)
Expand Down Expand Up @@ -116,22 +104,43 @@ function benchmark(init, setup, fs::Tuple{Vararg{Any, N}}, teardown;
elseif evals === nothing && calibration2 !== nothing && first(calibration2).evals == new_evals # Can't match both
data[1] = calibration2
else
data[1], time = bench(new_evals)
data[1], time = _benchmark_2(args1, setup, teardown, gc, new_evals, true, fs...)
end

i = 1
stop_time = seconds === nothing ? nothing : start_time + round(UInt64, 1e9seconds)
while (seconds === nothing || signed(stop_time - time) >= 0) && (samples === nothing || i < samples)
sample, time = bench(new_evals)
sample, time = _benchmark_2(args1, setup, teardown, gc, new_evals, true, fs...)
samples === nothing ? push!(data, sample) : (data[i += 1] = sample)
end

samples === nothing || resize!(data, i)

ntuple(i -> Benchmark([s[i] for s in data]), N)
end

function _benchmark_2(args1, setup, teardown, gc::Bool, evals::Int, warmup::Bool, fs...)
@nospecialize
N = length(fs)
p = N == 1 ? (1,) : N == 2 ? rand() < .5 ? (1,2) : (2,1) : randperm(N)
t = Ref(zero(UInt64))
args2 = maybecall(setup, args1)
rp = ntuple(N) do i
old_gc = gc || GC.enable(false)
sample, ti, args3 = try
_benchmark_3(fs[p[i]], args2, evals, warmup)
finally
gc || GC.enable(old_gc)
end
maybecall(teardown, (args3,))
t[] = ti
sample
end
ntuple(i -> rp[p[i]], N), t[]
end

_div(a, b) = a == b == 0 ? zero(a/b) : a/b
function _benchmark(f::F, args::A, evals::Int, warmup::Bool) where {F, A}
function _benchmark_3(f::F, args::A, evals::Int, warmup::Bool) where {F, A}
gcstats = Base.gc_num()
cumulative_compile_timing(true)
ctime, time0, time1, res = try
Expand Down
7 changes: 5 additions & 2 deletions src/precompile.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
precompile(minimum, (Benchmark,))
precompile(summarize, (Benchmark,))
precompile(process_args, (Any,))
precompile(create_function, (Symbol,))
precompile(benchmark, (Any,Any,Tuple{Any},Any))
precompile(create_first_function, (Symbol,))
precompile(create_function, (Expr,))
precompile(create_first_function, (Expr,))
precompile(_benchmark_1, (Any,Any,Any,Int,Int,Float64,Bool,Any))
precompile(Base.show, (IO, MIME"text/plain", Sample))
Loading