diff --git a/scripts/choose_p.jl b/scripts/choose_p.jl
index 864e643..17be4c8 100644
--- a/scripts/choose_p.jl
+++ b/scripts/choose_p.jl
@@ -30,7 +30,7 @@ function generate_multipole(center, ρs, θs, ϕs, qs, expansion_order)
     system = build_system(center, ρs, θs, ϕs, qs)
 
     # build branch
-    branch = Branch(1:1, 0, 1:0, 0, 1, center, 0.0, 0.0, SVector{6}(0.0,0,0,0,0,0), SVector{3}(0.0,0,0), expansion_order)
+    branch = Branch(1:length(ρs), 0, 1:0, 0, 1, center, 0.0, 0.0, SVector{6}(0.0,0,0,0,0,0), SVector{3}(0.0,0,0), expansion_order)
 
     # multipole coefficients
     body_to_multipole!(branch, system, branch.harmonics, Val(expansion_order))
@@ -120,19 +120,43 @@ function solve_p_new(ε, ρ, am, al)
     return 51
 end
 
+function solve_p_new_multipole(ε, ρ, am, al)
+    target = ε * (ρ-am-al)
+    f1 = am / (ρ-al)
+    t1 = 3 * am
+    for p in 0:50
+        this_target = target * (2*p+8)# * sqrt(4*p+6)
+        t1 < this_target && (return p)
+        t1 *= f1
+    end
+    return 51
+end
+
 function solve_p_old(ε, ρ, am)
     c = ρ/am - 1.0
     p_old = Int(ceil(-log(c,ε)))
     return p_old
 end
 
-function choose_p(epsilon::Number, dx=0.0, pmax=51)
+function solve_p_old_multipole(ε, ρ, am, al)
+    target = ε * (ρ - am - al)
+    f2 = am / (ρ-al)
+    t2 = am
+    for p in 0:50
+        t2 < target && (return p)
+        t2 *= f2
+    end
+    return 51
+end
+
+function choose_p(epsilon::Number, dx=0.0, pmax=51; multipole_only=false,
+        ρs = [1.0],
+        θs = [π/2],
+        ϕs = [0.0],
+        qs = [1.0],
+    )
     # define multipole (source)
     multipole_center = SVector{3}(0.0,0,0)
-    ρs = [1.0]
-    θs = [π/2]
-    ϕs = [0.0]
-    qs = [1.0]
     multipole, system = generate_multipole(multipole_center, ρs, θs, ϕs, qs, pmax)
 
     # define target
@@ -144,12 +168,20 @@ function choose_p(epsilon::Number, dx=0.0, pmax=51)
 
     # determine expansion order
     ρ = norm(local_center - multipole.center)
+    al = 1.0 - dx
     am = ρs[1]
-    p_old = solve_p_old(epsilon, ρ, am)
+    if multipole_only
+        p_old = solve_p_old_multipole(epsilon, ρ, am, al)
+    else
+        p_old = solve_p_old(epsilon, ρ, am)
+    end
 
     # use new error function
-    al = 1.0 - dx
-    p_new = solve_p_new(epsilon, ρ, am, al)
+    if multipole_only
+        p_new = solve_p_new_multipole(epsilon, ρ, am, al)
+    else
+        p_new = solve_p_new(epsilon, ρ, am, al)
+    end
 
     # potential
     u_multipole = evaluate_multipole(xt, multipole, p_old)
@@ -163,7 +195,12 @@ function choose_p(epsilon::Number, dx=0.0, pmax=51)
     return abs((u_multipole - u_direct)/u_direct), abs((u_local - u_direct)/u_direct), p_old, abs((u_multipole_new - u_direct)/u_direct), abs((u_local_new - u_direct)/u_direct), p_new
 end
 
-function choose_p(εs::Vector, dx=0.0, pmax=50)
+function choose_p(εs::Vector, dx=0.0, pmax=51; multipole_only=false,
+        ρs = [1.0],
+        θs = [π/2],
+        ϕs = [0.0],
+        qs = [1.0],
+    )
     multipole_error = zeros(length(εs))
     local_error = zeros(length(εs))
     new_multipole_error = zeros(length(εs))
@@ -171,7 +208,7 @@ function choose_p(εs::Vector, dx=0.0, pmax=50)
     old_ps = zeros(Int,length(εs))
     new_ps = zeros(Int,length(εs))
     for (i,ε) in enumerate(εs)
-        m, l, p_old, m_new, l_new, p_new = choose_p(ε, dx, pmax)
+        m, l, p_old, m_new, l_new, p_new = choose_p(ε, dx, pmax; multipole_only, ρs, θs, ϕs, qs)
         multipole_error[i] = m
         local_error[i] = l
         old_ps[i] = p_old
@@ -182,6 +219,48 @@ function choose_p(εs::Vector, dx=0.0, pmax=50)
     return multipole_error, local_error, old_ps, new_multipole_error, new_local_error, new_ps
 end
 
+function uniform_sphere(nx, A)
+    ρs = Float64[]
+    θs = Float64[]
+    ϕs = Float64[]
+    for x in range(-1, stop=1.0, length= nx)
+        for y in range(-1, stop=1.0, length=nx)
+            for z in range(-1, stop=1.0, length=nx)
+                ρ = x*x+y*y+z*z
+                if ρ <= 1.0
+                    r, θ, ϕ = FastMultipole.cartesian_to_spherical(SVector{3}(x,y,z))
+                    push!(ρs, r)
+                    push!(θs, θ)
+                    push!(ϕs, ϕ)
+                end
+            end
+        end
+    end
+    qs = zeros(length(ρs))
+    qs .= A / length(qs)
+    return ρs, θs, ϕs, qs
+end
+
+function uniform_box(nx, A)
+    ρs = Float64[]
+    θs = Float64[]
+    ϕs = Float64[]
+    for x in range(-1, stop=1.0, length= nx)
+        for y in range(-1, stop=1.0, length=nx)
+            for z in range(-1, stop=1.0, length=nx)
+                r, θ, ϕ = FastMultipole.cartesian_to_spherical(SVector{3}(x,y,z))
+                push!(ρs, r)
+                push!(θs, θ)
+                push!(ϕs, ϕ)
+            end
+        end
+    end
+    qs = zeros(length(ρs))
+    qs .= A / length(qs)
+    return ρs, θs, ϕs, qs
+end
+
+#=
 fig = figure("choose p")
 fig.clear()
 fig.add_subplot(211, xlabel="relative error tolerance", ylabel="resulting error")
@@ -211,3 +290,38 @@ tight_layout()
 
 BSON.@save "choose_p2_d$distance.bson" m l old_ps m_new l_new new_ps
 savefig("choose_p2_d$distance.png")
+=#
+
+fig = figure("choose p multipole")
+fig.clear()
+fig.add_subplot(121, xlabel="relative error tolerance", ylabel="multipole error")
+fig.add_subplot(122, xlabel="relative error tolerance", ylabel="expansion order")
+ax1 = fig.get_axes()[0]
+ax0 = fig.get_axes()[1]
+
+distance = -0.25
+eps = [10.0^x for x in -12:0.005:0]
+
+nx = 5
+A = 1.0
+ρs, θs, ϕs, qs = uniform_sphere(nx, A)
+#ρs, θs, ϕs, qs = uniform_box(nx, A)
+m, l, old_ps, m_new, l_new, new_ps = choose_p(eps, distance; multipole_only=true, ρs, θs, ϕs, qs)
+
+ax0.plot(eps, new_ps)
+ax0.plot(eps, old_ps)
+ax1.plot(eps, m_new)
+ax1.plot(eps, m)
+ax1.plot(eps, eps, "red")
+
+ax0.set_xscale("log")
+ax0.legend(["uniform source", "worst case"])
+
+ax1.set_xscale("log")
+ax1.set_yscale("log")
+ax1.legend(["uniform source", "worst case", "tolerance"])
+
+tight_layout()
+
+#BSON.@save "choose_p2_d$distance.bson" m l old_ps m_new l_new new_ps
+#savefig("choose_p2_d$distance.png")
diff --git a/scripts/test_local_error.jl b/scripts/test_local_error.jl
new file mode 100644
index 0000000..857dcc8
--- /dev/null
+++ b/scripts/test_local_error.jl
@@ -0,0 +1,213 @@
+using FLOWMath
+using FastMultipole
+using BenchmarkTools
+using LegendrePolynomials
+using PythonPlot
+
+function theta_integral(ρ_max, ΔC, n, ρ̂)
+    stuff = (-ρ_max^2 + ΔC^2 + ρ̂^2) / (2 * ΔC * ρ̂)
+    θs = range(0, stop=acos(stuff), length=100)
+    y2 = zeros(length(θs))
+    for i in eachindex(θs)
+        y2[i] = abs(Plm(cos(θs[i]),n,0)) * sin(θs[i])
+    end
+    return trapz(θs, y2)
+end
+
+function manual_integral(ρ_max, ΔC, n)
+    ρ̂s = range(ΔC-ρ_max, stop=ΔC+ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = theta_integral(ρ_max, ΔC, n, ρ̂) / ρ̂^(n-1)
+    end
+    return trapz(ρ̂s, y1)
+end
+
+function manual_sum(r, ρ_max, A, p, ΔC)
+    println("Manual Sum: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    for n in p+1:100
+        val += 3*A*r^n/(2*ρ_max_3) * manual_integral(ρ_max, ΔC, n)
+    end
+    return val
+end
+
+function theta_integral2(ρ_max, ΔC, n, ρ̂)
+    stuff = (-ρ_max^2 + ΔC^2 + ρ̂^2) / (2 * ΔC * ρ̂)
+    θs = range(0, stop=acos(stuff), length=100)
+    y2 = zeros(length(θs))
+    for i in eachindex(θs)
+        y2[i] = sin(θs[i])
+    end
+    return trapz(θs, y2)
+end
+
+function manual_integral2(ρ_max, ΔC, n)
+    ρ̂s = range(ΔC-ρ_max, stop=ΔC+ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = theta_integral2(ρ_max, ΔC, n, ρ̂) / ρ̂^(n-1)
+    end
+    return trapz(ρ̂s, y1)
+end
+
+function manual_sum2(r, ρ_max, A, p, ΔC)
+    println("Manual Sum 2: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    for n in p+1:100
+        val += 3*A*r^n/(2*ρ_max_3) * manual_integral2(ρ_max, ΔC, n)
+    end
+    return val
+end
+
+function manual_integral3(ρ_max, ΔC, n)
+    ρ̂s = range(ΔC-ρ_max, stop=ΔC+ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = (2*ΔC*ρ̂ + ρ_max^2 - ΔC^2 - ρ̂^2) / (2*ΔC*ρ̂^n)
+    end
+    return trapz(ρ̂s, y1)
+end
+
+function manual_sum3(r, ρ_max, A, p, ΔC)
+    println("Manual Sum 3: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    for n in p+1:100
+        val += 3*A*r^n/(2*ρ_max_3) * manual_integral3(ρ_max, ΔC, n)
+    end
+    return val
+end
+
+function manual_integral4(ρ_max, ΔC, n)
+    val = 0.0
+    x = ΔC + ρ_max
+    val += x^(2-n) / (2-n) + (ρ_max^2 - ΔC^2) * x^(1-n) / (1-n) / (2*ΔC) - x^(3-n) / (2*ΔC*(3-n))
+    x = ΔC - ρ_max
+    val -= x^(2-n) / (2-n) + (ρ_max^2 - ΔC^2) * x^(1-n) / (1-n) / (2*ΔC) - x^(3-n) / (2*ΔC*(3-n))
+    return val
+end
+
+function manual_sum4(r, ρ_max, A, p, ΔC)
+    println("Manual Sum 4: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    for n in p+1:100
+        val += 3*A*r^n/(2*ρ_max_3) * manual_integral4(ρ_max, ΔC, n)
+    end
+    return val
+end
+
+function η(p, ΔC, ρ_max, r)
+    val = 0.0
+    p < 1 && (val += log((ΔC+ρ_max)/(ΔC-ρ_max)))
+    p < 0 && (val += 2*ρ_max/r)
+    p < -1 && (val += 2*ΔC*ρ_max/(r*r))
+    return val
+end
+
+function manual_sum5(r, ρ_max, A, p, ΔC)
+    println("Manual Sum 5: p=$p")
+    logterm = log((1-r/(ΔC+ρ_max)) / (1-r/(ΔC-ρ_max)))
+    t1 = 0.0
+    for n in 1:p-2
+        t1 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+    t2 = 0.0
+    for n in 1:p-1
+        t2 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+    t3 = 0.0
+    for n in 1:p-3
+        t3 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+
+    val = 0.0
+    val += r * (η(p-1, ΔC, ρ_max, r) + logterm + t1)
+    val += (ρ_max^2-ΔC^2)/(2*ΔC) * (η(p, ΔC, ρ_max, r) + logterm + t2)
+    val += -r^2 / (2*ΔC) * (η(p-2, ΔC, ρ_max, r) + logterm + t3)
+    val *= 3*A*r/(2*ρ_max^3)
+
+    return val
+end
+
+function manual_sum5_rel(r, ρ_max, A, p, ΔC)
+    println("Manual Sum 5 rel: p=$p")
+    logterm = log((1-r/(ΔC+ρ_max)) / (1-r/(ΔC-ρ_max)))
+    t1 = 0.0
+    for n in 1:p-2
+        t1 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+    t2 = 0.0
+    for n in 1:p-1
+        t2 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+    t3 = 0.0
+    for n in 1:p-3
+        t3 += ((r/(ΔC+ρ_max))^n - (r/(ΔC-ρ_max))^n) / n
+    end
+
+    val = 0.0
+    val += r * (η(p-1, ΔC, ρ_max, r) + logterm + t1)
+    val += (ρ_max^2-ΔC^2)/(2*ΔC) * (η(p, ΔC, ρ_max, r) + logterm + t2)
+    val += -r^2 / (2*ΔC) * (η(p-2, ΔC, ρ_max, r) + logterm + t3)
+    val *= 3*(ΔC-r)*r/(2*ρ_max^3)
+
+    println("\n--- p=$p ---")
+    Γ = 3*r*(ΔC-r)/(2*ρ_max^3)
+    r_max = r
+    ρ2_ΔC2_2ΔC = (ρ_max^2 - ΔC^2) / (2*ΔC)
+    r2_ΔC2 = r^2/(2*ΔC)
+    @show Γ, r_max, ρ2_ΔC2_2ΔC, r2_ΔC2
+    @show logterm + t1
+    @show logterm + t2
+    @show logterm + t3
+
+    return val
+end
+
+function upper_bound_2(r, ρ_max, A, p, ΔC)
+    return A / (ΔC - ρ_max - r) * (r/(ΔC - ρ_max))^(p+1)
+end
+
+
+# manual integral
+r, ρ_max, A, ΔC = 1.0, 3.0, 1.0, 5.0
+ps = collect(0:20)
+
+# integral of abs(Plm(cos)...
+#e_manual = manual_sum.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+# let abs(Plm(cos(theta))) <= 1; numerical integral
+e2_manual = manual_sum2.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+# analytic integral
+e3_manual = manual_sum3.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+# simplification
+e4_manual = manual_sum4.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+# simplification
+e5_manual = manual_sum5.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+e5_manual_rel = manual_sum5_rel.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+ub2 = upper_bound_2.(Ref(r), Ref(ρ_max), Ref(A), ps, Ref(ΔC))
+
+p = ps[end]
+
+#@btime upper_bound_1($r,$ρ_max,$A,$p,$ΔC)
+#@btime upper_bound_2($r,$ρ_max,$A,$p,$ΔC)
+
+#--- check FastMultipole function ---#
+
+r_min = ΔC - r
+r_max = r
+ρ_min = ΔC - ρ_max
+Pmax = 20
+ε_rel = 0.0
+this_p = FastMultipole.get_P(r_min, r_max, ρ_min, ρ_max, ΔC^2, Pmax, ε_rel, FastMultipole.UniformUnequalSpheres())
diff --git a/scripts/test_multipole_error.jl b/scripts/test_multipole_error.jl
new file mode 100644
index 0000000..e9f8f63
--- /dev/null
+++ b/scripts/test_multipole_error.jl
@@ -0,0 +1,99 @@
+using LegendrePolynomials
+using FLOWMath
+
+function theta_integral(n)
+    θs = range(0, stop=pi, length=100)
+    y2 = zeros(length(θs))
+    for i in eachindex(θs)
+        y2[i] = Plm(cos(θs[i]),n,0) * sin(θs[i])
+    end
+    return trapz(θs, y2)
+end
+
+function manual_integral(ρ_max, n)
+    ρ̂s = range(0, stop=ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = ρ̂^(n+2)
+    end
+    return trapz(ρ̂s, y1) * theta_integral(n)
+end
+
+function manual_sum(r, ρ_max, A, p)
+    println("Manual Sum: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    rinv = 1 / r
+    for n in p+1:100
+        val += manual_integral(ρ_max, n) * rinv^n
+    end
+    return 3*A/(2*ρ_max_3*r) * val
+end
+
+function theta_integral2(n)
+    return sqrt(2/(2*n+1))
+end
+
+function manual_integral2(ρ_max, n)
+    ρ̂s = range(0, stop=ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = ρ̂^(n+2)
+    end
+    return trapz(ρ̂s, y1) * theta_integral2(n)
+end
+
+function manual_sum2(r, ρ_max, A, p)
+    println("Manual Sum: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    rinv = 1 / r
+    for n in p+1:100
+        val += manual_integral2(ρ_max, n) * rinv^n
+    end
+    return 3*A/(2*ρ_max_3*r) * val
+end
+
+function theta_integral3(n)
+    return 2.0
+end
+
+function manual_integral3(ρ_max, n)
+    ρ̂s = range(0, stop=ρ_max, length=100)
+    y1 = zeros(length(ρ̂s))
+    for i in eachindex(y1)
+        ρ̂ = ρ̂s[i]
+        y1[i] = ρ̂^(n+2)
+    end
+    return trapz(ρ̂s, y1) * theta_integral3(n)
+end
+
+function manual_sum3(r, ρ_max, A, p)
+    println("Manual Sum: p=$p")
+    val = 0.0
+    ρ_max_3 = ρ_max^3
+    rinv = 1 / r
+    for n in p+1:100
+        val += manual_integral3(ρ_max, n) * rinv^n
+    end
+    return 3*A/(2*ρ_max_3*r) * val
+end
+
+function upper_bound(r, ρ_max, A, p)
+    t1 = 3*A / (p+4)
+    t2 = sqrt(1/(r*p+6))
+    t3 = 1 / (r - ρ_max)
+    t4 = (ρ_max / r)^(p+1)
+    return t1*t2*t3*t4
+end
+
+r, ρ_max, A = 4.0, 3.0, 1.0
+ps = collect(0:10)
+
+e_manual = manual_sum.(Ref(r), Ref(ρ_max), Ref(A), ps)
+e2_manual = manual_sum2.(Ref(r), Ref(ρ_max), Ref(A), ps)
+e3_manual = manual_sum3.(Ref(r), Ref(ρ_max), Ref(A), ps)
+e_ub = upper_bound.(Ref(r), Ref(ρ_max), Ref(A), ps)
+
diff --git a/src/FastMultipole.jl b/src/FastMultipole.jl
index f7e0595..a6e4011 100644
--- a/src/FastMultipole.jl
+++ b/src/FastMultipole.jl
@@ -14,6 +14,12 @@ const ONE_THIRD = 1/3
 const DEBUG = Array{Bool,0}(undef)
 DEBUG[] = false
 
+const WARNING_FLAG_LEAF_SIZE = Array{Bool,0}(undef)
+WARNING_FLAG_LEAF_SIZE[] = true
+
+const WARNING_FLAG_PMAX = Array{Bool,0}(undef)
+WARNING_FLAG_PMAX[] = true
+
 # multithreading parameters
 #const MIN_NPT_B2M = 1
 #const MIN_NPT_M2M = 1
diff --git a/src/fmm.jl b/src/fmm.jl
index 82152aa..38af391 100644
--- a/src/fmm.jl
+++ b/src/fmm.jl
@@ -964,21 +964,8 @@ function fmm!(target_tree::Tree, target_systems, source_tree::Tree, source_syste
 
             nearfield && nearfield_multithread!(target_systems, target_tree.branches, derivatives_switches, source_systems, source_tree.branches, direct_list, n_threads)
             upward_pass && upward_pass_multithread!(source_tree.branches, source_systems, Pmax, lamb_helmholtz, source_tree.levels_index, source_tree.leaf_index, n_threads)
-            if DEBUG[]
-            println("\n\nMultithreaded Tests:")
-            println("\tpost upward pass:")
-            @show source_tree.branches[end].multipole_expansion
-        end
             horizontal_pass && length(m2l_list) > 0 && horizontal_pass_multithread!(target_tree.branches, source_tree.branches, m2l_list, lamb_helmholtz, Pmax, n_threads)
-            if DEBUG[]
-            println("\tpost horizontal pass")
-            @show target_tree.branches[end].local_expansion
-        end
             downward_pass && downward_pass_multithread!(target_tree.branches, target_systems, derivatives_switches, Pmax, lamb_helmholtz, target_tree.levels_index, target_tree.leaf_index, n_threads)
-            if DEBUG[]
-            println("\tpost downward pass")
-            @show target_tree.branches[end].local_expansion
-        end
 
         end
 
diff --git a/src/interaction_list.jl b/src/interaction_list.jl
index b32545f..2a87236 100644
--- a/src/interaction_list.jl
+++ b/src/interaction_list.jl
@@ -12,16 +12,24 @@ function build_interaction_lists(target_branches, source_branches, source_leaf_i
     return m2l_list, direct_list
 end
 
-function get_P(r_min, r_max, ρ_min, ρ_max, expansion_order::Int, error_method::ErrorMethod)
+function get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, expansion_order::Int, error_method::ErrorMethod)
     return expansion_order
 end
 
-function get_P(r_min, r_max, ρ_min, ρ_max, ::Dynamic{PMAX,RTOL}, error_method::ErrorMethod) where {PMAX,RTOL}
-    return get_P(r_min, r_max, ρ_min, ρ_max, PMAX, RTOL, error_method)
+function get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, ::Dynamic{PMAX,RTOL}, error_method::ErrorMethod) where {PMAX,RTOL}
+    return get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, PMAX, RTOL, error_method)
 end
 
+@inline function warn_Pmax()
+    if WARNING_FLAG_PMAX[]
+        @warn "error tolerance not met with dynamic expansion order; try increasing `Pmax`"
+        WARNING_FLAG_PMAX[] = false
+    end
+end
+
+
 """
-    get_P(r_min, r_max, ρ_min, ρ_max, Pmax, ε_rel, error_method)
+    get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, Pmax, ε_rel, error_method)
 
 Returns the smallest expansion order not greater than `Pmax` and satisfying the specified relative error tolerance.
 
@@ -31,6 +39,7 @@ Returns the smallest expansion order not greater than `Pmax` and satisfying the
 * `r_max::Float64`: distance from the local expansion to the farthest target
 * `ρ_min::Float64`: distance from the local expansion to the closest source
 * `ρ_max::Float64`: distance from the multipole expansion to the farthest source
+* `ΔC2::Float64`: distance squared between multipole and local centers
 * `Pmax::Int64`: maximum allowable expansion order
 * `ε_rel::Float64`: relative error tolerance
 * `error_method::ErrorMethod`: type used to dispatch on the desired error method
@@ -40,7 +49,7 @@ Returns the smallest expansion order not greater than `Pmax` and satisfying the
 * `P::Int`: the smallest expansion order to satisfy the error tolerance
 
 """
-function get_P(r_min, r_max, ρ_min, ρ_max, Pmax, ε_rel, ::Union{EqualSpheres, UnequalSpheres, UnequalBoxes})
+function get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, Pmax, ε_rel, ::Union{EqualSpheres, UnequalSpheres, UnequalBoxes})
     ρ_max_over_r_min = ρ_max / r_min
     r_max_over_ρ_min = r_max / ρ_min
     t1 = ρ_max_over_r_min
@@ -51,24 +60,89 @@ function get_P(r_min, r_max, ρ_min, ρ_max, Pmax, ε_rel, ::Union{EqualSpheres,
         t2 *= r_max_over_ρ_min
     end
 
-    @warn "error tolerance not met with dynamic expansion order; try increasing `Pmax`"
+    warn_Pmax()
 
     return Pmax
 end
 
-function get_P(r_min, r_max, ρ_min, ρ_max, Pmax, ε_rel, ::Union{UniformUnequalSpheres, UniformUnequalBoxes})
+function get_P(r_min, r_max, ρ_min, ρ_max, ΔC2, Pmax, ε_rel, ::Union{UniformUnequalSpheres, UniformUnequalBoxes})
 
     ρ_max_over_r_min = ρ_max / r_min
     r_max_over_ρ_min = r_max / ρ_min
-    t1 = ρ_max / (r_min - ρ_max)
-    t2 = r_max_over_ρ_min
-    for P in 0:Pmax-1
-        t1 * sqrt(9 / ( (P+4)*(P+4)*(4*P+6) )) + t2 < ε_rel && (return P)
-        t1 *= ρ_max_over_r_min
-        t2 *= r_max_over_ρ_min
+
+    # multipole error
+    ε_multipole = 1.5 * ρ_max / (r_min - ρ_max)
+
+    # local error
+    ρ_max2 = ρ_max * ρ_max
+    Γ = 3 * r_max * r_min / (2 * ρ_max2 * ρ_max)
+
+    # distance from multipole center to point closest to the local expansion
+    ΔC = sqrt(ΔC2)
+    ρ_max_line = ΔC - ρ_min
+
+    # distance from local center to farthest multipole point
+    ΔC_plus = ρ_min + ρ_max_line + ρ_max_line
+
+    # distance from local center to nearest multipole point
+    ΔC_minus = ρ_min
+
+    t_plus = r_max / ΔC_plus
+    t_minus = r_max / ΔC_minus
+    L = log((1-t_plus)/(1-t_minus))
+    ΔC2_inv = 1/(2*ΔC)
+    ρ2_ΔC2_2ΔC = (ρ_max2 - ΔC2) * ΔC2_inv
+    r2_ΔC2 = r_max * r_max * ΔC2_inv
+
+    # recursively tracked quantities
+    Lζ_sum_pm1 = L
+    Lζ_sum_pm2 = L
+    Lζ_sum_pm3 = L
+    t_plus_n = t_plus
+    t_minus_n = t_minus
+    η_p = log(ΔC_plus / ΔC_minus)
+    r_inv = 1/r_max
+    η_pm1 = η_p + (ΔC_plus - ΔC_minus) * r_inv
+    η_pm2 = η_pm1 + (ΔC_plus*ΔC_plus - ΔC_minus*ΔC_minus) * r_inv * r_inv * 0.5
+
+    #--- test p=0 ---#
+
+    # test error
+    ε_local = Γ * (r_max * (η_pm1 + Lζ_sum_pm2) + ρ2_ΔC2_2ΔC * (η_p + Lζ_sum_pm1) - r2_ΔC2 * (η_pm2 + Lζ_sum_pm3))
+    ε_multipole * 0.25 + ε_local < ε_rel && (return 0)
+
+    # recurse multipole
+    ε_multipole *= ρ_max_over_r_min
+
+    # recurse local
+    η_pm2 = η_pm1
+    η_pm1 = η_p
+    η_p = zero(r_min)
+
+    #--- test p>0 ---#
+
+    for P in 1:Pmax-1
+        # get local error
+        ε_local = Γ * (r_max * (η_pm1 + Lζ_sum_pm2) + ρ2_ΔC2_2ΔC * (η_p + Lζ_sum_pm1) - r2_ΔC2 * (η_pm2 + Lζ_sum_pm3))
+
+        # check error
+        ε_multipole / (P+4) + ε_local < ε_rel && (return P)
+
+        # recurse multipole
+        ε_multipole *= ρ_max_over_r_min
+
+        # recurse local
+        Lζ_sum_pm3 = Lζ_sum_pm2
+        Lζ_sum_pm2 = Lζ_sum_pm1
+        Lζ_sum_pm1 += (t_plus_n - t_minus_n) / P
+        t_plus_n *= t_plus
+        t_minus_n *= t_minus
+        η_pm2 = η_pm1
+        η_pm1 = η_p
+        η_p = zero(r_min)
     end
 
-    @warn "error tolerance not met with dynamic expansion order; try increasing `Pmax`"
+    warn_Pmax()
 
     return Pmax
 end
@@ -153,7 +227,7 @@ function build_interaction_lists!(m2l_list, direct_list, i_target, j_source, tar
     if separation_distance_squared * multipole_threshold * multipole_threshold > summed_radii * summed_radii
     #if ρ_max <= multipole_threshold * r_min && r_max <= multipole_threshold * ρ_min # exploring a new criterion
         if ff
-            P = get_P(r_min, r_max, ρ_min, ρ_max, expansion_order, error_method)
+            P = get_P(r_min, r_max, ρ_min, ρ_max, separation_distance_squared, expansion_order, error_method)
             push!(m2l_list, (i_target, j_source, P))
         end
     elseif source_branch.n_branches == target_branch.n_branches == 0 # both leaves
diff --git a/src/tree.jl b/src/tree.jl
index e738a7c..90c8c94 100644
--- a/src/tree.jl
+++ b/src/tree.jl
@@ -1,6 +1,3 @@
-const WARNING_FLAG_LEAF_SIZE = Array{Bool,0}(undef)
-WARNING_FLAG_LEAF_SIZE[] = true
-
 #####
 ##### tree constructor
 #####