From 1c50638a654be886b5cebab01ac90c9db6cc25f3 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 08:37:09 +0100 Subject: [PATCH 01/11] sanitize some interfaces --- src/matrix.jl | 93 ++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/src/matrix.jl b/src/matrix.jl index d33c8691d..e39d832be 100644 --- a/src/matrix.jl +++ b/src/matrix.jl @@ -422,7 +422,7 @@ function _det(a::fpMatrix) return r end -function map_entries!(a::fpMatrix, k::Nemo.fpField, A::ZZMatrix) +function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix) ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n) ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A) a.base_ring = k # exploiting that the internal repr is the indep of char @@ -433,22 +433,11 @@ function Base.copy!(A::ZZMatrix, B::ZZMatrix) ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B) end -function Nemo.sub!(A::ZZMatrix, m::Int) +function sub!(A::ZZMatrix, B::ZZMatrix, m::Int) for i=1:nrows(A) A_p = Nemo.mat_entry_ptr(A, i, i) - sub!(A_p, A_p, m) - end - return A -end - -# No allocations; also faster than *= -function Nemo.mul!(A::ZZMatrix, m::ZZRingElem) - for i=1:nrows(A) - A_p = Nemo.mat_entry_ptr(A, i, 1) - for j=1:ncols(A) - mul!(A_p, A_p, m) - A_p += sizeof(Int) - end + B_p = Nemo.mat_entry_ptr(B, i, i) + sub!(A_p, B_p, m) end return A end @@ -495,11 +484,12 @@ function is_unimodular(A::ZZMatrix; algorithm=:auto) A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A)) while nbits(M) <= target_bitsize_modulus @vprint(:UnimodVerif,2,".") - p = next_prime(p + rand(1:2^10)) # increment by a random step to a prime + p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime ZZmodP = Nemo.Native.GF(p; cached = false, check = false) - map_entries!(A_mod_p, ZZmodP, A) - @vtime :UnimodVerif 2 det_mod_p = Int(_det(A_mod_p)) - if det_mod_p > p/2 + map_entries!(ZZmodP, A_mod_p, A) + @vtime :UnimodVerif 2 det_mod_p = Int(_det(A_mod_p)) + p2 = p>>1 + if det_mod_p > p2 det_mod_p -= p end if abs(det_mod_p) != 1 @@ -511,12 +501,11 @@ function is_unimodular(A::ZZMatrix; algorithm=:auto) det_mod_m = det_mod_p mul!(M, M, p) end - return is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm=algorithm) + return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm) end #function -# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use. -function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto) +function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto) # This UI is convenient for our sophisticated determinant algorithm. # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann # unimodular verification then call whichever is likely to be faster @@ -533,7 +522,7 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting") if algorithm == :pauderis_storjohann @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating") - return is_unimodular_Pauderis_Storjohann_Hensel(A) + return _is_unimodular_Pauderis_Storjohann_Hensel(A) end n = ncols(A) Hrow = hadamard_bound2(A) @@ -557,7 +546,7 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=# # Expected RAM requirement is not excessive, and # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel - return is_unimodular_Pauderis_Storjohann_Hensel(A) + return _is_unimodular_Pauderis_Storjohann_Hensel(A) end end if algorithm == :CRT @@ -578,13 +567,13 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle end # advance to another prime which does not divide M: while true - p = next_prime(p+rand(1:stride)) + p = next_prime(p+rand(1:stride), false) if !is_divisible_by(M,p) break end end ZZmodP = Nemo.Native.GF(p; cached = false, check = false) - map_entries!(A_mod_p, ZZmodP, A) + map_entries!(ZZmodP, A_mod_p, A) det_mod_p = _det(A_mod_p) if det_mod_p > p/2 det_mod_p -= p @@ -592,19 +581,24 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle if abs(det_mod_p) != 1 || det_mod_m != det_mod_p return false # obviously not unimodular end - M *= p + mul!(M, M, p) end return true end -# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use. -# Pauderis-Storjohann unimodular verification/certification. -# We use Hensel lifting to obtain the modular inverse B0 -# Seems to be faster than the CRT prototype (not incl. here) -# VERBOSITY via :UnimodVerif -function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) +#DoublePlusOne complex ############################## + +#TODO: Hensel is slower than CRT if modulus is small(ish) + +function _PSH_init(A::ZZMatrix) + #computes a + # modulus (X) in the paper + # the inverse of A mod x + # the exponent target for the lifting. + n = nrows(A) - # assume ncols == n + @assert ncols(A) == n + EntrySize = maximum(abs, A) entry_size_bits = maximum(nbits, A) e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits)) @@ -618,8 +612,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)") m = ZZ(p) ZZmodP = Nemo.Native.GF(p) - MatModP = matrix_space(ZZmodP,n,n) - B0 = lift(inv(MatModP(A))) + B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift mod_sym!(B0, m) delta = identity_matrix(base_ring(A), n) A_mod_m = identity_matrix(base_ring(A), n) @@ -628,12 +621,12 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))"); copy!(A_mod_m, A) mm = m^2 - mod_sym!(A_mod_m,mm) + mod_sym!(A_mod_m, mm) @vtime :UnimodVerif 2 mul!(delta, A_mod_m, B0) - sub!(delta, 1) + sub!(delta, delta, 1) divexact!(delta, m) # to make matrix product in line below cheaper @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta) - mul!(A_mod_m, m) + mul!(A_mod_m, A_mod_m, m) sub!(B0, B0, A_mod_m) m = mm mod_sym!(B0, m) @@ -648,11 +641,23 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) k = 2+nbits(ceil(Int,k/log2(m))) @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)") - ZZmodM,_ = residue_ring(ZZ,m; cached = false) # m is probably NOT prime + return m, B0, k +end + + +# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use. +# Pauderis-Storjohann unimodular verification/certification. +# We use Hensel lifting to obtain the modular inverse B0 +# Seems to be faster than the CRT prototype (not incl. here) +# VERBOSITY via :UnimodVerif +function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) + n = nrows(A) + m, B0, k = _PSH_init(A) + ## @assert is_one(lift(MatModM(B0*A))) # Originally: R = (I-A*B0)/m R = A*B0 - sub!(R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring + sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring divexact!(R, m) if is_zero(R) return true @@ -661,9 +666,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) #ZZMatrix and ZZModMatrix are the same type. The modulus is #always passed in as an extra argument when needed... #mul! for ZZModMatrix is mul, followed by reduce. - - B0_modm = deepcopy(B0) - mod_sym!(B0_modm, m) + #thus the code is not using ZZModMatrices at all... R_bar = zero_matrix(ZZ, n, n) M = zero_matrix(ZZ, n, n) @@ -675,7 +678,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) mul!(R_bar, R, R) #Next 3 lines do: M = lift(B0_modm*MatModM(R_bar)); mod_sym!(R_bar, m) - mul!(M, B0_modm, R_bar) + mul!(M, B0, R_bar) mod_sym!(M, m) # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation mul!(R, A, M) From 54f71dc3bb515b5d66de5c4a56db9896ce8538c4 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 15:05:30 +0100 Subject: [PATCH 02/11] move the unimodularity around and add solvers started ZZMatrix-linalg with - is_unimodular - dixon/ solve/ init - DoublePlusOne solver added some ZZ/ZZMatrix functions --- src/Nemo.jl | 1 + src/ZZMatrix-linalg.jl | 978 +++++++++++++++++++++++++++++++++++ src/flint/fmpz.jl | 10 + src/flint/fmpz_mat.jl | 23 + src/matrix.jl | 295 ----------- test/Nemo-test.jl | 2 + test/ZZMatrix-linalg-test.jl | 30 ++ 7 files changed, 1044 insertions(+), 295 deletions(-) create mode 100644 src/ZZMatrix-linalg.jl create mode 100644 test/ZZMatrix-linalg-test.jl diff --git a/src/Nemo.jl b/src/Nemo.jl index 4b3fef149..e283d7b1a 100644 --- a/src/Nemo.jl +++ b/src/Nemo.jl @@ -433,6 +433,7 @@ include("Rings.jl") include("matrix.jl") include("poly.jl") +include("ZZMatrix-linalg.jl") include("Infinity.jl") diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl new file mode 100644 index 000000000..3ed7444bf --- /dev/null +++ b/src/ZZMatrix-linalg.jl @@ -0,0 +1,978 @@ +################################################################## +## Functions related to unimodularity verification +################################################################## + +# ***SOURCE ALGORITHM*** +# Refined implementation of method from +# Authors: C. Pauderis + A. Storjohann +# Title: Detrministic Unimodularity Certification +# Where: Proc ISSAC 2012, pp. 281--287 +# See also: thesis of Colton Pauderis, 2013, Univ of Waterloo +# Deterministic Unimodularity Certification and Applications for Integer Matrices + + +# add_verbosity_scope(:UnimodVerif) -- see func __init__ in src/Nemo.jl + +####################################################### +# Low-level auxiliary functions -- should be elsewhere? +####################################################### + +function _det(a::fpMatrix) + a.r < 10 && return lift(det(a)) + #_det avoids a copy: det is computed destructively + r = ccall((:_nmod_mat_det, Nemo.libflint), UInt, (Ref{fpMatrix}, ), a) + return r +end + +function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix) + ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n) + ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A) + a.base_ring = k # exploiting that the internal repr is the indep of char + return a +end + +@doc raw""" + is_unimodular(A::ZZMatrix) + is_unimodular(A::ZZMatrix; algorithm=:auto) + +Determine whether `A` is unimodular, i.e. whether `abs(det(A)) == 1`. +The method used is either that of Pauderis--Storjohann or using CRT; +the choice is made based on cost estimates for the two approaches. + +The kwarg `algorithm` may be specified to be `:CRT` or `:pauderis_storjohann` +to ensure that the corresponding underlying algorithm is used (after a quick +initial unimodularity check). `:CRT` is better if the matrix is unimodular +and has an inverse containing large entries (or if it is not unimodular); +`:pauderis_storjohann` is asymptotically faster when the matrix is unimodular +and its inverse does not contain large entries, but it requires more space +for intermediate expressions. +""" +function is_unimodular(A::ZZMatrix; algorithm=:auto) + # Call this function when no extra info about the matrix is available. + # It does a preliminary check that det(A) is +/-1 modulo roughly 2^100. + # If so, then delegate the complete check to is_unimodular_given_det_mod_m + if !is_square(A) + throw(ArgumentError("Matrix must be square")) + end + if !(algorithm in [:auto, :CRT, :pauderis_storjohann]) + throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]")) + end + # Deal with two trivial cases + if nrows(A) == 0 + return true + end + if nrows(A) == 1 + return (abs(A[1,1]) == 1); + end + # Compute det mod M -- may later include some more primes + target_bitsize_modulus = 100 # magic number -- should not be too small! + @vprintln(:UnimodVerif,1,"Quick first check: compute det by crt up to modulus with about $(target_bitsize_modulus) bits") + p::Int = 2^20 # start point of primes for initial CRT trial -- det with modulus >= 2^22 is much slower + M = ZZ(1) + det_mod_m = 0 # 0 means uninitialized, o/w value is 1 or -1 + A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A)) + while nbits(M) <= target_bitsize_modulus + @vprint(:UnimodVerif,2,".") + p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime + ZZmodP = Nemo.Native.GF(p; cached = false, check = false) + map_entries!(ZZmodP, A_mod_p, A) + @vtime :UnimodVerif 2 det_mod_p = Int(_det(A_mod_p)) + p2 = p>>1 + if det_mod_p > p2 + det_mod_p -= p + end + if abs(det_mod_p) != 1 + return false # obviously not unimodular + end + if det_mod_m != 0 && det_mod_m != det_mod_p + return false # obviously not unimodular + end + det_mod_m = det_mod_p + mul!(M, M, p) + end + return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm) +end #function + + +function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto) + # This UI is convenient for our sophisticated determinant algorithm. + # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann + # unimodular verification then call whichever is likely to be faster + # (but we avoid P-S if the space estimate is too large). + # M is a modulus satisfying M > 2^30; + # det_mod_m is det(A) mod M [!not checked!]: it is either +1 or -1. + is_square(A) || throw(ArgumentError("Matrix must be square")) + abs(det_mod_m) == 1 || throw(ArgumentError("det_mod_m must be +1 or -1")) + M > 2^30 || throw(ArgumentError("modulus must be greater than 2^30")) + (algorithm in [:auto, :CRT, :pauderis_storjohann]) || throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]")) + # Deal with two trivial cases -- does this make sense here??? + nrows(A) == 0 && return true + nrows(A) == 1 && return (abs(A[1,1]) == 1) + @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting") + if algorithm == :pauderis_storjohann + @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating") + return _is_unimodular_Pauderis_Storjohann_Hensel(A) + end + n = ncols(A) + Hrow = hadamard_bound2(A) + Hcol = hadamard_bound2(transpose(A)) + DetBoundSq = min(Hrow, Hcol) + Hbits = 1+div(nbits(DetBoundSq),2) + @vprintln(:UnimodVerif,1,"Hadamard bound in bits $(Hbits)") + if algorithm == :auto + # Estimate whether better to "climb out" with CRT or use Pauderis-Storjohann + CRT_speed_factor = 20.0 # scale factor is JUST A GUESS !!! (how much faster CRT is compared to P-S) + total_entry_size = sum(nbits,A) + Estimate_CRT = ceil(((Hbits - nbits(M))*(2.5+total_entry_size/n^3))/CRT_speed_factor) # total_entry_size/n is proportional to cost of reducing entries mod p + EntrySize = maximum(abs, A) + entry_size_bits = maximum(nbits, A) + @vprintln(:UnimodVerif,1,"entry_size_bits = $(entry_size_bits) log2(EntrySize) = $(ceil(log2(EntrySize)))") + e = max(16, Int(2+ceil(2*log2(n)+log2(EntrySize)))) # see Condition in Fig 1, p.284 of Pauderis-Storjohann + Estimate_PS = ceil(e*log(e)) # assumes soft-linear ZZ arith + @vprintln(:UnimodVerif,1,"Est_CRT = $(Estimate_CRT) and Est_PS = $(Estimate_PS)") + Space_estimate_PS = ZZ(n)^2 * e # nbits of values, ignoring mem mgt overhead + @vprintln(:UnimodVerif,1,"Space est PS: $(Space_estimate_PS) [might be bytes]") + if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=# + # Expected RAM requirement is not excessive, and + # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel + return _is_unimodular_Pauderis_Storjohann_Hensel(A) + end + end + if algorithm == :CRT + @vprintln(:UnimodVerif,1,"User specified CRT --> delegating") + end + @vprintln(:UnimodVerif,1,"UnimodularVerification: CRT loop") + # Climb out with CRT: + # in CRT loop start with 22 bit primes; if we reach threshold (empirical), jump to much larger primes. + p = 2^21; stride = 2^10; threshold = 5000000; + A_mod_p = zero_matrix(Nemo.Native.GF(2), nrows(A), ncols(A)) + while nbits(M) < Hbits + @vprint(:UnimodVerif,2,".") + # Next lines increment p (by random step up to stride) to a prime not dividing M + if p > threshold && p < threshold+stride + @vprint(:UnimodVerif,2,"!!") + p = 2^56; stride = 2^25; # p = 2^56 is a good choice on my standard 64-bit machine + continue + end + # advance to another prime which does not divide M: + while true + p = next_prime(p+rand(1:stride), false) + if !is_divisible_by(M,p) + break + end + end + ZZmodP = Nemo.Native.GF(p; cached = false, check = false) + map_entries!(ZZmodP, A_mod_p, A) + det_mod_p = _det(A_mod_p) + if det_mod_p > p/2 + det_mod_p -= p + end + if abs(det_mod_p) != 1 || det_mod_m != det_mod_p + return false # obviously not unimodular + end + mul!(M, M, p) + end + return true +end + +#DoublePlusOne complex ############################## + +#TODO: Hensel is slower than CRT if modulus is small(ish) + +function _PSH_init(A::ZZMatrix) + #computes a + # modulus (X) in the paper + # the inverse of A mod x + # the exponent target for the lifting. + + n = nrows(A) + @assert ncols(A) == n + + EntrySize = maximum(abs, A) + entry_size_bits = maximum(nbits, A) + e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits)) + ModulusLWB = ZZ(2)^e + @vprintln(:UnimodVerif,1,"ModulusLWB is 2^$(e)") + # Now look for prime: about 25 bits, and such that p^(2^sthg) is + # just slightly bigger than ModulusLWB. Bit-size of prime matters little. + root_order = exp2(ceil(log2(e/25))) + j = ceil(exp2(e/root_order)) + p = next_prime(Int(j)) + @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)") + m = ZZ(p) + ZZmodP = Nemo.Native.GF(p) + B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift + mod_sym!(B0, m) + delta = identity_matrix(base_ring(A), n) + A_mod_m = identity_matrix(base_ring(A), n) + @vprintln(:UnimodVerif,1,"Starting stage 1 lifting") + while (m < ModulusLWB) + @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))"); + copy!(A_mod_m, A) + mm = m^2 + mod_sym!(A_mod_m, mm) + @vtime :UnimodVerif 2 mul!(delta, A_mod_m, B0) + sub!(delta, delta, 1) + divexact!(delta, m) # to make matrix product in line below cheaper + @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta) + mul!(A_mod_m, A_mod_m, m) + sub!(B0, B0, A_mod_m) + m = mm + mod_sym!(B0, m) + end + @vprintln(:UnimodVerif,1,"Got stage 1 modular inverse: modulus has $(nbits(m)) bits; value is $(p)^$(Int(round(log2(m)/log2(p))))") + + # Hadamard bound brings little benefit; anyway, almost never lift all the way +### H = min(hadamard_bound2(A), hadamard_bound2(transpose(A))) +### println("nbits(H) = $(nbits(H))"); + # Compute max num iters in k + k = (n-1)*(entry_size_bits+log2(n)/2) - (2*log2(n) + entry_size_bits -1) + k = 2+nbits(ceil(Int,k/log2(m))) + @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)") + + return m, B0, k +end + + +# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use. +# Pauderis-Storjohann unimodular verification/certification. +# We use Hensel lifting to obtain the modular inverse B0 +# Seems to be faster than the CRT prototype (not incl. here) +# VERBOSITY via :UnimodVerif +function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) + n = nrows(A) + m, B0, k = _PSH_init(A) + + ## @assert is_one(lift(MatModM(B0*A))) + # Originally: R = (I-A*B0)/m + R = A*B0 + sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring + divexact!(R, m) + if is_zero(R) + return true + end + + #ZZMatrix and ZZModMatrix are the same type. The modulus is + #always passed in as an extra argument when needed... + #mul! for ZZModMatrix is mul, followed by reduce. + #thus the code is not using ZZModMatrices at all... + + R_bar = zero_matrix(ZZ, n, n) + M = zero_matrix(ZZ, n, n) + + @vprintln(:UnimodVerif,1,"Starting stage 2 lifting") + for i in 0:k-1 + @vprintln(:UnimodVerif,1,"Stage 2 loop: i=$(i)") + + mul!(R_bar, R, R) + #Next 3 lines do: M = lift(B0_modm*MatModM(R_bar)); + mod_sym!(R_bar, m) + mul!(M, B0, R_bar) + mod_sym!(M, m) + # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation + mul!(R, A, M) + sub!(R, R_bar, R) + divexact!(R, m) + if is_zero(R) + return true + end + end + return false +end + +#adaptive rational_reconstruction: if solution is unbalanced with +#denominator smaller than numerator +function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem) + A = similar(a) + + T = ZZRingElem(Val(:raw)) + n = ZZRingElem(Val(:raw)) + d = ZZRingElem(Val(:raw)) + bN = ZZRingElem(Val(:raw)) + bD = ZZRingElem(Val(:raw)) + D = ZZRingElem(1) + for i=1:nrows(a) + a_ptr = Nemo.mat_entry_ptr(a, i, 1) + A_ptr = Nemo.mat_entry_ptr(A, i, 1) + for j=1:ncols(a) + Nemo.set!(T, a_ptr) + Nemo.mul!(T, T, D) + Nemo.mod!(T, T, b) + fl = ratrec!(n, d, T, b, bN, bD) + fl || return fl, A, D + if !isone(d) + mul!(D, D, d) + Nemo.mul!(A, A, d) + end + Nemo.set!(A_ptr, n) + + a_ptr += sizeof(ZZRingElem) + A_ptr += sizeof(ZZRingElem) + end + end + Nemo._fmpz_clear_fn(bN) + Nemo._fmpz_clear_fn(bD) + Nemo._fmpz_clear_fn(n) + Nemo._fmpz_clear_fn(d) + Nemo._fmpz_clear_fn(T) + + return true, A, D +end + +#output sensitive rational_reconstruction, in particular if +#numerator is larger than den +function ratrec!(n::ZZRingElem, d::ZZRingElem, a::ZZRingElem, b::ZZRingElem, N::ZZRingElem = ZZ(), D::ZZRingElem= ZZ()) + k = nbits(b) + l = 1 + set!(N, b) + set!(D, 2) + +# @assert 0N + @ccall Nemo.libflint.fmpz_root(N::Ref{ZZRingElem}, b::Ref{ZZRingElem}, 2::Int)::Nothing + shift_right!(D, N, 1) + done = true + end + +# @assert 2*N*D < b + + fl = ccall((:_fmpq_reconstruct_fmpz_2, Nemo.libflint), Bool, (Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}), n, d, a, b, N, D) + + if fl && (nbits(n)+nbits(d) < k - 30 || D>N) + return fl + end + l += 1 + end + return false +end + +function change_prime!(a::fpMatrix, p::UInt) + @ccall libflint.nmod_mat_set_mod(a::Ref{fpMatrix}, p::UInt)::Nothing +end + +function lift!(A::ZZMatrix, a::fpMatrix) + ccall((:fmpz_mat_set_nmod_mat, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{fpMatrix}), A, a) +end + +mutable struct DixonCtx + Ainv::fpMatrix + p::UInt + crt_primes::Vector{UInt} + A_mod::Vector{fpMatrix} + d_mod::fpMatrix + y_mod::fpMatrix + Ay_mod::fpMatrix + Ay::ZZMatrix + x::ZZMatrix + d::ZZMatrix + A::ZZMatrix + function DixonCtx() + return new() + end +end + +#copied from flint to allow the use of adaptive reconstruction, +function dixon_init(A::ZZMatrix, B::ZZMatrix; side::Symbol = :right) + return dixon_init!(DixonCtx(), A, B; side) +end + +function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right) + + if side == :right + @assert nrows(B) == ncols(A) + else + @assert ncols(B) == nrows(A) + end + + if isdefined(D, :A) + @assert size(A) == size(D.A) + end + + if isdefined(D, :x) + @assert size(D.x) == size(B) + end + + if side == :right + if isdefined(D, :A) + copy!(D.A, A) + else + D.A = A + end + ncB = ncols(B) + else + if isdefined(D, :A) + transpose!(D.A, A) + else + D.A = transpose(A) + end + ncB = nrows(B) + end + + n = nrows(D.A) + + p = next_prime(2^59, false) + while true + if isdefined(D, :Ainv) + map_entries!(Native.GF(p, cached = false, check = false), D.Ainv, D.A) + else + D.Ainv = map_entries(Native.GF(p, cached = false, check = false), D.A) + end + try + D.Ainv = inv(D.Ainv) + break + catch e + if e != ErrorException || e.msg != "Matrix not invertible" + rethrow(e) + end + end + p = next_prime(p, false) + end + @assert p != 0 + D.p = p + + if !isdefined(D, :crt_primes) + D.crt_primes = UInt[] + D.A_mod = fpMatrix[] + end + + k = Nemo.fpField(UInt(p), false) + if !isdefined(D, :d_mod) + D.d_mod = zero_matrix(k, n, ncB) + D.y_mod = zero_matrix(k, n, ncB) + D.Ay_mod = zero_matrix(k, n, ncB) + D.Ay = zero_matrix(ZZ, n, ncB) + D.x = zero_matrix(ZZ, n, ncB) + end + + if ncB == 1 + return D + end + + pr = ZZRingElem(1) + xp = next_prime(p, false) + mA = maximum(abs, A) + maxA = mA *(p-1)*n*2 + + while true + push!(D.crt_primes, xp) + push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), A)) + mul!(pr, pr, xp) + if pr > maxA + break + end + xp = next_prime(xp, false) + end + + return D +end + +function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int = 10) + if side == :right + d = deepcopy(B) + _B = B + else + d = transpose(B) + _B = transpose(B) + end + @assert ncols(D.A) == nrows(d) + @assert size(D.x) == size(d) + #we're solveing Ax=B, always. if side = :left, then the interface transposes + zero!(D.x) + + mA = maximum(abs, D.A) + mB = maximum(abs, B) + n = nrows(D.A) + #bound for the solution is hadarmat/ cramer + bound = max((n-1)*mA^2+mB^2, n*mA^2)^n * 2^30 + + ppow = ZZRingElem(1) + i = 1 + nexti = 1 + while ppow <= bound + map_entries!(Nemo.fpField(D.p, false), D.d_mod, d) + + Nemo.mul!(D.y_mod, D.Ainv, D.d_mod) + ccall((:fmpz_mat_scalar_addmul_nmod_mat_fmpz, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{fpMatrix}, Ref{ZZRingElem}), D.x, D.y_mod, ppow) + + Nemo.mul!(ppow, ppow, D.p) + + if ppow > bound + break + end + + stabilised = i == nexti; + i += 1 + + if stabilised + nexti = ceil(Int,(i*1.4)) + 1; + #TODO: maybe col by col? to stop doing cols that are already there? + #main use currently is 1 col anyway + fl, num, den = induce_rational_reconstruction(D.x, ppow) + + if fl +# @show fl = (D.A*num == den*_B) + sz = max(maximum(nbits, D.A) + maximum(nbits, num) + + nbits(ncols(_B)) + 1, + maximum(nbits, _B) + nbits(den)) + + if sz < nbits(ppow) + #we know the product is correct as the p-adic precision + #was high enough already... +# @show "no prod neccessary" + else + xp = next_prime(D.p) +# @show ceil(Int, (sz - nbits(ppow))/60) + local Ap::fpMatrix + local Bp::fpMatrix + local nump::fpMatrix + local lhs::fpMatrix + local rhs::fpMatrix + for i=1:ceil(Int, (sz - nbits(ppow))/60) + k = Nemo.fpField(xp, false) + if i == 1 + Ap = map_entries(k, D.A) + Bp = map_entries(k, _B) + nump = map_entries(k, num) + lhs = Ap*nump + rhs = Bp*k(den) + else + map_entries!(Ap, k, D.A) + map_entries!(Bp, k, _B) + map_entries!(nump, k, num) + mul!(lhs, Ap, nump) + mul!(rhs, Bp, k(den)) + end + fl = lhs == rhs + fl || break + xp = next_prime(xp) + end + end + + if side == :right + fl && return num, den + else + fl && return transpose(num), den + end + end + end + + if ncols(_B) == 1 + n = nrows(D.A) + for i=1:n + Ay_ptr = mat_entry_ptr(D.Ay, i, 1) + A_ptr = mat_entry_ptr(D.A, i, 1) + d_ptr = mat_entry_ptr(d, i, 1) + zero!(Ay_ptr) + for j=1:n + y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1) + addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr)) + A_ptr += sizeof(ZZRingElem) + end + sub!(d_ptr, d_ptr, Ay_ptr) + end + else + C = CrtCtx_Mat(block) + for j=1:length(D.crt_primes) + change_prime!(D.y_mod, D.crt_primes[j]) + change_prime!(D.Ay_mod, D.crt_primes[j]) + + mul!(D.Ay_mod, D.A_mod[j], D.y_mod) + push!(C, D.Ay_mod) + end + change_prime!(D.y_mod, D.p) + finish(C) + Nemo.sub!(d, d, finish(C)) + end + divexact!(d, d, ZZ(D.p)) + end + fl, num, den = induce_rational_reconstruction(D.x, ppow) + @assert fl + + if side == :right + return num, den + else + return transpose(num), den + end +end + +function dixon_solve(A::ZZMatrix, B::ZZMatrix; side::Symbol = :right) + d = dixon_init(A, B; side) + return dixon_solve(d, B; side) +end + +#iterative CRT for matrices +#will use flint (induce_crt!) for blocks of ?? matrices and then switch +#to some tree-based asymptotically fast version. + +function induce_crt!(A::ZZMatrix, p::ZZRingElem, B::fpMatrix; signed::Bool = false) + #the second modulus is implicit in B: B.n + + ccall((:fmpz_mat_CRT_ui, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}, Ref{ZZRingElem}, Ref{fpMatrix}, Int), A, A, p, B, signed) + Nemo.mul!(p, p, B.n) + return +end + +mutable struct CrtCtx_Mat + c::Int #how many primes before tree + + d::Vector{ZZRingElem} #the product tree so far + M::Vector{ZZMatrix} + + pos::Int + cc::Int + + function CrtCtx_Mat(c::Int = 1) + return new(c, ZZRingElem[], ZZMatrix[], 0, 0) + end +end + +function Base.push!(C::CrtCtx_Mat, a::fpMatrix) + if C.pos == 0 + push!(C.M, lift(a)) + push!(C.d, ZZ(a.n)) + C.cc = 1 + C.pos += 1 + return + end + pos = C.pos + if C.cc == 0 + if pos > length(C.M) + push!(C.M, lift(a)) + push!(C.d, ZZ(a.n)) + else + C.M[pos] = lift(a) + C.d[pos] = ZZ(a.n) + end + C.cc = 1 + else + induce_crt!(C.M[C.pos], C.d[C.pos], a; signed = true) + C.cc += 1 + end + if C.cc >= C.c #full... + while pos > 1 && C.d[pos-1] < C.d[pos] #assuming primes are in order + g, e, f = gcdx(C.d[pos-1], C.d[pos]) + @assert isone(g) + mul!(C.M[pos-1], f*C.d[pos]) + mul!(C.M[pos], e*C.d[pos-1]) + add!(C.M[pos-1], C.M[pos-1], C.M[pos]) + C.d[pos-1] *= C.d[pos] + #@show maximum(nbits, C.M[pos-1]) + mod_sym!(C.M[pos-1], C.d[pos-1]) + pos -= 1 + end + C.pos = pos + 1 + C.cc = 0 + end +end + +function finish(C::CrtCtx_Mat) + pos = C.pos + if C.cc == 0 + pos -= 1 + end + while pos > 1 + g, e, f = gcdx(C.d[pos-1], C.d[pos]) + @assert isone(g) + mul!(C.M[pos-1], f*C.d[pos]) + mul!(C.M[pos], e*C.d[pos-1]) + add!(C.M[pos-1], C.M[pos-1], C.M[pos]) + C.d[pos-1] *= C.d[pos] +# @show maximum(nbits, C.M[pos-1]), nbits(C.d[pos-1]) + mod_sym!(C.M[pos-1], C.d[pos-1]) + pos -= 1 + end + return C.M[1] +end + + +#################################################################### +# DoublePlusOne Solver +#################################################################### + +#U represents a 1 x n ZZ matrix in base m +# each row is a single (symmetric) p-adic digit +# (or should be). At this point the entries can be too large, the +# carries will have to be propagated down +# in the algorithm +# - the top is never changed, we we will not look at it (again) +# - the matrix is larger than it seems: U.r is less than actually allocated +# so it will be increased... +# Handle with care. +#_to_base below will actually convert U, inplace, to the correct ZZMatrix +function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U), R::ZZMatrix = zero_matrix(ZZ, 1, ncols(U))) + + i = start + t = ZZRingElem(Val(:raw)) +# R = zero_matrix(ZZ, 1, ncols(U)) + zero!(R) + while true + R_ptr = Nemo.mat_entry_ptr(R, 1, 1) + U_ptr = Nemo.mat_entry_ptr(U, i, 1) + + for j=1:ncols(U) + add!(R_ptr, R_ptr, U_ptr) + mod_sym!(U_ptr, R_ptr, m, t) + sub!(R_ptr, R_ptr, U_ptr) + divexact!(R_ptr, R_ptr, m) + R_ptr += sizeof(Clong) + U_ptr += sizeof(Clong) + end + i += 1 + if i > nrows(U) + if i > last || is_zero(R) + Nemo._fmpz_clear_fn(t) + return U + end + while !is_zero(R) + if i > last + Nemo._fmpz_clear_fn(t) + return U + end + U.r += 1 + @assert U.r <= last + U_ptr = Nemo.mat_entry_ptr(U, i, 1) + R_ptr = Nemo.mat_entry_ptr(R, 1, 1) + + for j=1:ncols(U) + mod_sym!(U_ptr, R_ptr, m, t) + sub!(R_ptr, R_ptr, U_ptr) + divexact!(R_ptr, R_ptr, m) + R_ptr += sizeof(Clong) + U_ptr += sizeof(Clong) + end + i += 1 + end + return U + end + end +end + + +function mod_sym!(a::Ptr{ZZRingElem}, b::Ptr{ZZRingElem}, c::ZZRingElem, t::ZZRingElem = ZZ(0)) + ccall((:fmpz_ndiv_qr, Nemo.libflint), Cvoid, (Ref{ZZRingElem}, Ptr{ZZRingElem}, Ptr{ZZRingElem}, Ref{ZZRingElem}), t, a, b, c) +end + +#add C into A[c:end, :] +#add!(view(A, c+1:end, :), C) (but without the view) +function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int) + A.r = max(A.r, C.r+c) + for i=1:nrows(C) + A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) + C_ptr = Nemo.mat_entry_ptr(C, i, 1) + for j=1:ncols(A) + add!(A_ptr, C_ptr) + A_ptr += sizeof(Clong) + C_ptr += sizeof(Clong) + end + end +end + +#sub!(view(A, c+1:end, :), C) (but without the view) +function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int) + A.r = max(A.r, C.r+c) + for i=1:nrows(C) + A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) + C_ptr = Nemo.mat_entry_ptr(C, i, 1) + for j=1:ncols(A) + sub!(A_ptr, C_ptr) + A_ptr += sizeof(Clong) + C_ptr += sizeof(Clong) + end + end +end + +#saves essentially the allocation of A.rows +function view!(A::ZZMatrix, B::ZZMatrix, r::UnitRange{Int}, ::Colon) + A.r = length(r) + A.c = B.c + A.entries = B.entries + for i=1:A.r + unsafe_store!(A.rows, unsafe_load(B.rows, r.start+i-1), i)# + 8*(c.start -1)) + end + A.view_parent = (B, ) + return A +end + +# "solves" xA = U using the double + 1 method +#Pauderis and Storjohann, same as uni-cert. +# the functions returns +# - a matrix X in ZZ and a denominator d +# +#TODO: +# - _renorm using preinv (to speed things up) +# - if |U| >> |A| use the Abbott trick: write U in base ? and solve multiple +# systems - otherwise the X below needs to be too large and/ or the +# _renorm has to work too much +# - a special version for odd determinant where X is a power of 2 or better +# 2^64 to use limbs (thus making modm _renorm and _to_base trivial) +# (and ugly to implement) +# + +function UniCertSolve(A::ZZMatrix, U::ZZMatrix) + n = nrows(A); + @assert ncols(A) == ncols(U) == n + @assert maximum(nbits, A) >= maximum(nbits, U) + + GC_d = GC.enable(false) #within this function essentially + #nothing can be collected, all(?) operations are julia inplace + #and use a lot of memory (fir large examples) + + m, B0, k = Nemo._PSH_init(A) + if GC_d + GC.enable(true) + GC.gc() + GC.enable(false) + end + + @vprintln(:UnimodVerif,1, "Solve with DoublePlusOne: Max num iters is k=$(k)") + + local pr::Int + pr = 2^(k+2)-2 + allV = ZZMatrix[] + for i=1:nrows(U) + V = zero_matrix(ZZ, pr+10, ncols(U)) + V.r = 1 + push!(allV, V) + end +# @assert maximum(abs, U) <= m + + R = A*B0 + sub!(R, R, 1) + divexact!(R, m) + #R = (I-A*B0)/m + + tmp_V = zero_matrix(ZZ, max(pr + 10, n), ncols(U)) + RE_tmp = zero_matrix(ZZ, 1, ncols(U)) + view_V = view(tmp_V, :, :) + view_U = view(U, :, :) + view_tmp_V = view(tmp_V, :, :) + + for i=1:nrows(U) + V = allV[i] + mul!(view!(view_V, V, 1:1, :), view!(view_U, U, i:i, :), B0) + V.r = 1 + _renorm(V, m; last = 10, R = RE_tmp) + + tV = view!(view_tmp_V, tmp_V, 1:nrows(V), :) + mul!(tV, V, R) + sub_into!(V, tV, 1) #sol mod m^2 + _renorm(V, m; last = 10, R = RE_tmp) + end + + if is_zero(R) + mu = vcat([_to_base!(t, m) for t = allV]...) + tau = induce_rational_reconstruction(mu, m) + @assert tau[1] + GC.enable(GC_d) + return tau[2], tau[3] + end + ex = 1 + + R_bar = zero_matrix(ZZ, n, n) + M = zero_matrix(ZZ, n, n) + + for i in 1:k + @vprintln(:UnimodVerif, 1, "DoublePlusOne: loop: i=$(i)") + #@time R_bar = R^2; + mul!(R_bar, R, R) + vT = view!(view_tmp_V, tmp_V, 1:n, :) + mod_sym!(vT, R_bar, m) + mul!(M, B0, vT) + mod_sym!(M, m) + mul!(R, A, M) + sub!(R, R_bar, R) + divexact!(R, m) + + for j=1:nrows(U) + V =allV[j] + + vT = view!(view_tmp_V, tmp_V, 1:1, :) + mul!(vT, view!(view_U, U, j:j, :), M) + add_into!(V, vT, 2*ex) #sol mod 2ex+1 + _renorm(V, m, start = 2*ex, last = pr, R = RE_tmp) + end + + ex = 2*ex + 1 + + for jj = 1:nrows(U) + V = allV[jj] + + if nrows(V) > nrows(R) #this part should be automatic... + n = nrows(R) + l = nrows(V) + e = div(l, n) + vT = view(tmp_V, 1:l-e*n, :) + mul!(vT, view(V, e*n+1:l, :), R) + add_into!(V, vT, ex+e*n) + vT = view(tmp_V, 1:n, :) + for j=e:-1:1 + @assert j*n <= pr + mul!(vT, view(V, (j-1)*n+1:j*n, :), R) + add_into!(V, vT, ex+(j-1)*n) + end + else + vT = view!(view_tmp_V, tmp_V, 1:nrows(V), :) + mul!(vT, V, R) + add_into!(V, vT, ex) #sol mod 2(2ex+1) + end + _renorm(V, m, start = ex-1, last = pr, R = RE_tmp) + @assert V === allV[jj] + end + + mex = m^(2*ex) + mu = vcat([_to_base!(t[:, 1:1], m) for t = allV]...) + tau = induce_rational_reconstruction(mu, mex) + if tau[1] + GC.enable(true) + mu = vcat([_to_base!(deepcopy(t), m) for t = allV]...) + tau = induce_rational_reconstruction(mu, mex) + if tau[1] + GC.enable(GC_d) + return mu, m + end + GC.gc() + GC.enable(false) + end + end + mu = vcat([_to_base!(t, m) for t = allV]...) + tau = induce_rational_reconstruction(mu, mex) + @assert tau[1] + GC.enable(GC_d) + return tau[2], tau[3] +end + +#inplace conversion +#the rows of a are encoding a base-b row vector +#this computes asymptotically fast(?) the ZZ-version +function _to_base!(a::ZZMatrix, _b::ZZRingElem) + nr = nrows(a) + bb = ZZRingElem(Val(:raw)) + b = ZZRingElem(Val(:raw)) + set!(b, _b) + while nr > 1 + mul!(bb, b, b) + for i=1:div(nr, 2) + add_row!(a, b, 2*i-1, 2*i) + swap_rows!(a, i, 2*i-1) + zero_row!(a, 2*i) #to release memory early + end + if is_odd(nr) + add_row!(a, bb, div(nr, 2), nr) + zero_row!(a, nr) + end + nr = div(nr, 2) + b.d, bb.d = bb.d, b.d + end + Nemo._fmpz_clear_fn(bb) + Nemo._fmpz_clear_fn(b) + #all rows, but the 1st have been set to zero! - or have never been + #used. So no memory is lost... + a.r = 1 + return a +end + diff --git a/src/flint/fmpz.jl b/src/flint/fmpz.jl index 2c359921a..a6395bad6 100644 --- a/src/flint/fmpz.jl +++ b/src/flint/fmpz.jl @@ -2618,6 +2618,14 @@ end # +function tdiv_q!(a::ZZRingElem, b::ZZRingElem, c::ZZRingElem) + ccall((:fmpz_tdiv_q, Nemo.libflint), Cvoid, (Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}), a, b, c) +end + +function shift_right!(a::ZZRingElem, b::ZZRingElem, i::Int) + @ccall Nemo.libflint.fmpz_fdiv_q_2exp(a::Ref{ZZRingElem}, b::Ref{ZZRingElem}, i::Int)::Nothing +end + function pow!(z::ZZRingElemOrPtr, a::ZZRingElemOrPtr, b::Integer) @ccall libflint.fmpz_pow_ui(z::Ref{ZZRingElem}, a::Ref{ZZRingElem}, UInt(b)::UInt)::Nothing return z @@ -3225,3 +3233,5 @@ function resultant(f::ZZPolyRingElem, g::ZZPolyRingElem, d::ZZRingElem, nb::Int) @ccall libflint.fmpz_poly_resultant_modular_div(z::Ref{ZZRingElem}, f::Ref{ZZPolyRingElem}, g::Ref{ZZPolyRingElem}, d::Ref{ZZRingElem}, nb::Int)::Nothing return z end + + diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl index 0b64238af..9db2a3495 100644 --- a/src/flint/fmpz_mat.jl +++ b/src/flint/fmpz_mat.jl @@ -1803,11 +1803,24 @@ end # ############################################################################### +function Base.copy!(A::ZZMatrix, B::ZZMatrix) + ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B) +end + function zero!(z::ZZMatrixOrPtr) @ccall libflint.fmpz_mat_zero(z::Ref{ZZMatrix})::Nothing return z end +function zero_row!(z::ZZMatrix, i::Int) + z_ptr = mat_entry_ptr(z, i, 1) + for i=1:ncols(z) + zero!(z_ptr) + z_ptr += sizeof(Int) + end + return z +end + function one!(z::ZZMatrixOrPtr) @ccall libflint.fmpz_mat_one(z::Ref{ZZMatrix})::Nothing return z @@ -1828,6 +1841,15 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr) return z end +function sub!(A::ZZMatrix, B::ZZMatrix, m::Int) + for i=1:nrows(A) + A_p = Nemo.mat_entry_ptr(A, i, i) + B_p = Nemo.mat_entry_ptr(B, i, i) + sub!(A_p, B_p, m) + end + return A +end + function mul!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr) @ccall libflint.fmpz_mat_mul(z::Ref{ZZMatrix}, x::Ref{ZZMatrix}, y::Ref{ZZMatrix})::Nothing return z @@ -2151,3 +2173,4 @@ end ################################################################################ mat_entry_ptr(A::ZZMatrix, i::Int, j::Int) = unsafe_load(A.rows, i) + (j-1)*sizeof(ZZRingElem) + diff --git a/src/matrix.jl b/src/matrix.jl index e39d832be..ef06b1c81 100644 --- a/src/matrix.jl +++ b/src/matrix.jl @@ -395,298 +395,3 @@ function reduce_mod(A::MatElem{T}, B::MatElem{T}) where {T<:FieldElem} return C end - -################################################################## -## Functions related to unimodularity verification -################################################################## - -# ***SOURCE ALGORITHM*** -# Refined implementation of method from -# Authors: C. Pauderis + A. Storjohann -# Title: Detrministic Unimodularity Certification -# Where: Proc ISSAC 2012, pp. 281--287 -# See also: thesis of Colton Pauderis, 2013, Univ of Waterloo -# Deterministic Unimodularity Certification and Applications for Integer Matrices - - -# add_verbosity_scope(:UnimodVerif) -- see func __init__ in src/Nemo.jl - -####################################################### -# Low-level auxiliary functions -- should be elsewhere? -####################################################### - -function _det(a::fpMatrix) - a.r < 10 && return lift(det(a)) - #_det avoids a copy: det is computed destructively - r = ccall((:_nmod_mat_det, Nemo.libflint), UInt, (Ref{fpMatrix}, ), a) - return r -end - -function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix) - ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n) - ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A) - a.base_ring = k # exploiting that the internal repr is the indep of char - return a -end - -function Base.copy!(A::ZZMatrix, B::ZZMatrix) - ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B) -end - -function sub!(A::ZZMatrix, B::ZZMatrix, m::Int) - for i=1:nrows(A) - A_p = Nemo.mat_entry_ptr(A, i, i) - B_p = Nemo.mat_entry_ptr(B, i, i) - sub!(A_p, B_p, m) - end - return A -end - -@doc raw""" - is_unimodular(A::ZZMatrix) - is_unimodular(A::ZZMatrix; algorithm=:auto) - -Determine whether `A` is unimodular, i.e. whether `abs(det(A)) == 1`. -The method used is either that of Pauderis--Storjohann or using CRT; -the choice is made based on cost estimates for the two approaches. - -The kwarg `algorithm` may be specified to be `:CRT` or `:pauderis_storjohann` -to ensure that the corresponding underlying algorithm is used (after a quick -initial unimodularity check). `:CRT` is better if the matrix is unimodular -and has an inverse containing large entries (or if it is not unimodular); -`:pauderis_storjohann` is asymptotically faster when the matrix is unimodular -and its inverse does not contain large entries, but it requires more space -for intermediate expressions. -""" -function is_unimodular(A::ZZMatrix; algorithm=:auto) - # Call this function when no extra info about the matrix is available. - # It does a preliminary check that det(A) is +/-1 modulo roughly 2^100. - # If so, then delegate the complete check to is_unimodular_given_det_mod_m - if !is_square(A) - throw(ArgumentError("Matrix must be square")) - end - if !(algorithm in [:auto, :CRT, :pauderis_storjohann]) - throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]")) - end - # Deal with two trivial cases - if nrows(A) == 0 - return true - end - if nrows(A) == 1 - return (abs(A[1,1]) == 1); - end - # Compute det mod M -- may later include some more primes - target_bitsize_modulus = 100 # magic number -- should not be too small! - @vprintln(:UnimodVerif,1,"Quick first check: compute det by crt up to modulus with about $(target_bitsize_modulus) bits") - p::Int = 2^20 # start point of primes for initial CRT trial -- det with modulus >= 2^22 is much slower - M = ZZ(1) - det_mod_m = 0 # 0 means uninitialized, o/w value is 1 or -1 - A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A)) - while nbits(M) <= target_bitsize_modulus - @vprint(:UnimodVerif,2,".") - p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime - ZZmodP = Nemo.Native.GF(p; cached = false, check = false) - map_entries!(ZZmodP, A_mod_p, A) - @vtime :UnimodVerif 2 det_mod_p = Int(_det(A_mod_p)) - p2 = p>>1 - if det_mod_p > p2 - det_mod_p -= p - end - if abs(det_mod_p) != 1 - return false # obviously not unimodular - end - if det_mod_m != 0 && det_mod_m != det_mod_p - return false # obviously not unimodular - end - det_mod_m = det_mod_p - mul!(M, M, p) - end - return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm) -end #function - - -function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto) - # This UI is convenient for our sophisticated determinant algorithm. - # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann - # unimodular verification then call whichever is likely to be faster - # (but we avoid P-S if the space estimate is too large). - # M is a modulus satisfying M > 2^30; - # det_mod_m is det(A) mod M [!not checked!]: it is either +1 or -1. - is_square(A) || throw(ArgumentError("Matrix must be square")) - abs(det_mod_m) == 1 || throw(ArgumentError("det_mod_m must be +1 or -1")) - M > 2^30 || throw(ArgumentError("modulus must be greater than 2^30")) - (algorithm in [:auto, :CRT, :pauderis_storjohann]) || throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]")) - # Deal with two trivial cases -- does this make sense here??? - nrows(A) == 0 && return true - nrows(A) == 1 && return (abs(A[1,1]) == 1) - @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting") - if algorithm == :pauderis_storjohann - @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating") - return _is_unimodular_Pauderis_Storjohann_Hensel(A) - end - n = ncols(A) - Hrow = hadamard_bound2(A) - Hcol = hadamard_bound2(transpose(A)) - DetBoundSq = min(Hrow, Hcol) - Hbits = 1+div(nbits(DetBoundSq),2) - @vprintln(:UnimodVerif,1,"Hadamard bound in bits $(Hbits)") - if algorithm == :auto - # Estimate whether better to "climb out" with CRT or use Pauderis-Storjohann - CRT_speed_factor = 20.0 # scale factor is JUST A GUESS !!! (how much faster CRT is compared to P-S) - total_entry_size = sum(nbits,A) - Estimate_CRT = ceil(((Hbits - nbits(M))*(2.5+total_entry_size/n^3))/CRT_speed_factor) # total_entry_size/n is proportional to cost of reducing entries mod p - EntrySize = maximum(abs, A) - entry_size_bits = maximum(nbits, A) - @vprintln(:UnimodVerif,1,"entry_size_bits = $(entry_size_bits) log2(EntrySize) = $(ceil(log2(EntrySize)))") - e = max(16, Int(2+ceil(2*log2(n)+log2(EntrySize)))) # see Condition in Fig 1, p.284 of Pauderis-Storjohann - Estimate_PS = ceil(e*log(e)) # assumes soft-linear ZZ arith - @vprintln(:UnimodVerif,1,"Est_CRT = $(Estimate_CRT) and Est_PS = $(Estimate_PS)") - Space_estimate_PS = ZZ(n)^2 * e # nbits of values, ignoring mem mgt overhead - @vprintln(:UnimodVerif,1,"Space est PS: $(Space_estimate_PS) [might be bytes]") - if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=# - # Expected RAM requirement is not excessive, and - # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel - return _is_unimodular_Pauderis_Storjohann_Hensel(A) - end - end - if algorithm == :CRT - @vprintln(:UnimodVerif,1,"User specified CRT --> delegating") - end - @vprintln(:UnimodVerif,1,"UnimodularVerification: CRT loop") - # Climb out with CRT: - # in CRT loop start with 22 bit primes; if we reach threshold (empirical), jump to much larger primes. - p = 2^21; stride = 2^10; threshold = 5000000; - A_mod_p = zero_matrix(Nemo.Native.GF(2), nrows(A), ncols(A)) - while nbits(M) < Hbits - @vprint(:UnimodVerif,2,".") - # Next lines increment p (by random step up to stride) to a prime not dividing M - if p > threshold && p < threshold+stride - @vprint(:UnimodVerif,2,"!!") - p = 2^56; stride = 2^25; # p = 2^56 is a good choice on my standard 64-bit machine - continue - end - # advance to another prime which does not divide M: - while true - p = next_prime(p+rand(1:stride), false) - if !is_divisible_by(M,p) - break - end - end - ZZmodP = Nemo.Native.GF(p; cached = false, check = false) - map_entries!(ZZmodP, A_mod_p, A) - det_mod_p = _det(A_mod_p) - if det_mod_p > p/2 - det_mod_p -= p - end - if abs(det_mod_p) != 1 || det_mod_m != det_mod_p - return false # obviously not unimodular - end - mul!(M, M, p) - end - return true -end - -#DoublePlusOne complex ############################## - -#TODO: Hensel is slower than CRT if modulus is small(ish) - -function _PSH_init(A::ZZMatrix) - #computes a - # modulus (X) in the paper - # the inverse of A mod x - # the exponent target for the lifting. - - n = nrows(A) - @assert ncols(A) == n - - EntrySize = maximum(abs, A) - entry_size_bits = maximum(nbits, A) - e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits)) - ModulusLWB = ZZ(2)^e - @vprintln(:UnimodVerif,1,"ModulusLWB is 2^$(e)") - # Now look for prime: about 25 bits, and such that p^(2^sthg) is - # just slightly bigger than ModulusLWB. Bit-size of prime matters little. - root_order = exp2(ceil(log2(e/25))) - j = ceil(exp2(e/root_order)) - p = next_prime(Int(j)) - @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)") - m = ZZ(p) - ZZmodP = Nemo.Native.GF(p) - B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift - mod_sym!(B0, m) - delta = identity_matrix(base_ring(A), n) - A_mod_m = identity_matrix(base_ring(A), n) - @vprintln(:UnimodVerif,1,"Starting stage 1 lifting") - while (m < ModulusLWB) - @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))"); - copy!(A_mod_m, A) - mm = m^2 - mod_sym!(A_mod_m, mm) - @vtime :UnimodVerif 2 mul!(delta, A_mod_m, B0) - sub!(delta, delta, 1) - divexact!(delta, m) # to make matrix product in line below cheaper - @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta) - mul!(A_mod_m, A_mod_m, m) - sub!(B0, B0, A_mod_m) - m = mm - mod_sym!(B0, m) - end - @vprintln(:UnimodVerif,1,"Got stage 1 modular inverse: modulus has $(nbits(m)) bits; value is $(p)^$(Int(round(log2(m)/log2(p))))") - - # Hadamard bound brings little benefit; anyway, almost never lift all the way -### H = min(hadamard_bound2(A), hadamard_bound2(transpose(A))) -### println("nbits(H) = $(nbits(H))"); - # Compute max num iters in k - k = (n-1)*(entry_size_bits+log2(n)/2) - (2*log2(n) + entry_size_bits -1) - k = 2+nbits(ceil(Int,k/log2(m))) - @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)") - - return m, B0, k -end - - -# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use. -# Pauderis-Storjohann unimodular verification/certification. -# We use Hensel lifting to obtain the modular inverse B0 -# Seems to be faster than the CRT prototype (not incl. here) -# VERBOSITY via :UnimodVerif -function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix) - n = nrows(A) - m, B0, k = _PSH_init(A) - - ## @assert is_one(lift(MatModM(B0*A))) - # Originally: R = (I-A*B0)/m - R = A*B0 - sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring - divexact!(R, m) - if is_zero(R) - return true - end - - #ZZMatrix and ZZModMatrix are the same type. The modulus is - #always passed in as an extra argument when needed... - #mul! for ZZModMatrix is mul, followed by reduce. - #thus the code is not using ZZModMatrices at all... - - R_bar = zero_matrix(ZZ, n, n) - M = zero_matrix(ZZ, n, n) - - @vprintln(:UnimodVerif,1,"Starting stage 2 lifting") - for i in 0:k-1 - @vprintln(:UnimodVerif,1,"Stage 2 loop: i=$(i)") - - mul!(R_bar, R, R) - #Next 3 lines do: M = lift(B0_modm*MatModM(R_bar)); - mod_sym!(R_bar, m) - mul!(M, B0, R_bar) - mod_sym!(M, m) - # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation - mul!(R, A, M) - sub!(R, R_bar, R) - divexact!(R, m) - if is_zero(R) - return true - end - end - return false -end diff --git a/test/Nemo-test.jl b/test/Nemo-test.jl index 8e03ff876..bead4a11a 100644 --- a/test/Nemo-test.jl +++ b/test/Nemo-test.jl @@ -8,3 +8,5 @@ include("Infinity-test.jl") include("HeckeMiscLocalization-test.jl") include("matrix-test.jl") include("poly-test.jl") +include("ZZMatrix-linalg-test.jl") + diff --git a/test/ZZMatrix-linalg-test.jl b/test/ZZMatrix-linalg-test.jl new file mode 100644 index 000000000..d8d0a6af4 --- /dev/null +++ b/test/ZZMatrix-linalg-test.jl @@ -0,0 +1,30 @@ +@testset "dixon solver" begin + A = matrix(ZZ, 4, 4, [1, 2, 7, 4, 15, 6, 7, 9, 19, 10, 11, 11, 53, 14, 15, 16]) + b = matrix(ZZ, 4, 1, [-1, -4, 5, 7]) + bb = matrix(ZZ, 4, 2, [-1, -2, -3, 5, 11, 2, 13, 17]) + for i = [1,5,10] + s, d = Nemo.dixon_solve(A^i, b) + @test A^i*s == d*b + s, d = Nemo.dixon_solve(A^i, transpose(b)); side = :left + @test s*A^i == d*transpose(b) + + s, d = Nemo.dixon_solve(A^i, bb) + @test A^i*s == d*bb + s, d = Nemo.dixon_solve(A^i, transpose(bb)); side = :left + @test s*A^i == d*transpose(bb) + end +end + +@testset "DoublePlusOne" begin + A = matrix(ZZ, 4, 4, [1, 2, 7, 4, 15, 6, 7, 9, 19, 10, 11, 11, 53, 14, 15, 16]) + b = matrix(ZZ, 1, 4, [-1, -4, 5, 7]) + bb = matrix(ZZ, 2, 4, [-1, -2, -3, 5, 11, 2, 13, 17]) + for i = [1,5,10] + s, d = Nemo.UniCertSolve(A^i, b) + @test s*A^i == d*b + + s, d = Nemo.UniCertSolve(A^i, bb) + @test s*A^i == d*bb + end +end + From d0470ce439e24a622e0d06bfeaea00ff49691329 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 15:32:07 +0100 Subject: [PATCH 03/11] fix dims --- src/ZZMatrix-linalg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 3ed7444bf..1003d1130 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -384,9 +384,9 @@ end function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right) if side == :right - @assert nrows(B) == ncols(A) + @assert nrows(B) == nrows(A) else - @assert ncols(B) == nrows(A) + @assert ncols(B) == ncols(A) end if isdefined(D, :A) From be7d0dceaef25f0edc98c5cef0afc475453bca14 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 15:58:51 +0100 Subject: [PATCH 04/11] cool typo --- test/ZZMatrix-linalg-test.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/ZZMatrix-linalg-test.jl b/test/ZZMatrix-linalg-test.jl index d8d0a6af4..03e23829e 100644 --- a/test/ZZMatrix-linalg-test.jl +++ b/test/ZZMatrix-linalg-test.jl @@ -5,12 +5,12 @@ for i = [1,5,10] s, d = Nemo.dixon_solve(A^i, b) @test A^i*s == d*b - s, d = Nemo.dixon_solve(A^i, transpose(b)); side = :left + s, d = Nemo.dixon_solve(A^i, transpose(b); side = :left) @test s*A^i == d*transpose(b) s, d = Nemo.dixon_solve(A^i, bb) @test A^i*s == d*bb - s, d = Nemo.dixon_solve(A^i, transpose(bb)); side = :left + s, d = Nemo.dixon_solve(A^i, transpose(bb); side = :left) @test s*A^i == d*transpose(bb) end end From 6bc9c2c8ec708dce53105f99733fd4633ab5beae Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 16:18:44 +0100 Subject: [PATCH 05/11] transposing and using it! --- src/ZZMatrix-linalg.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 1003d1130..2079c7e3b 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -383,9 +383,11 @@ end function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right) + sB = size(B) if side == :right @assert nrows(B) == nrows(A) else + sB = (sB[2], sB[1]) @assert ncols(B) == ncols(A) end @@ -394,7 +396,7 @@ function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :righ end if isdefined(D, :x) - @assert size(D.x) == size(B) + @assert size(D.x) == sB end if side == :right @@ -460,7 +462,7 @@ function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :righ while true push!(D.crt_primes, xp) - push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), A)) + push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), D.A)) mul!(pr, pr, xp) if pr > maxA break @@ -590,6 +592,8 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int end divexact!(d, d, ZZ(D.p)) end + @show _t = D.A*D.x - _B + @show map(x-> is_zero(x) ? -1 : valuation(x, D.p), _t) fl, num, den = induce_rational_reconstruction(D.x, ppow) @assert fl From a79df589fd37a12ee6babe743dfec9556bc1f4d6 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 16:28:11 +0100 Subject: [PATCH 06/11] remove printing --- src/ZZMatrix-linalg.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 2079c7e3b..2e4d78686 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -592,8 +592,6 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int end divexact!(d, d, ZZ(D.p)) end - @show _t = D.A*D.x - _B - @show map(x-> is_zero(x) ? -1 : valuation(x, D.p), _t) fl, num, den = induce_rational_reconstruction(D.x, ppow) @assert fl From fa42363bbcae705ee923cfb19937aa9593ef6e5c Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Fri, 7 Feb 2025 16:55:58 +0100 Subject: [PATCH 07/11] remove debugging --- src/ZZMatrix-linalg.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 2e4d78686..1fb676d25 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -935,7 +935,7 @@ function UniCertSolve(A::ZZMatrix, U::ZZMatrix) tau = induce_rational_reconstruction(mu, mex) if tau[1] GC.enable(GC_d) - return mu, m + return tau[2], tau[3] end GC.gc() GC.enable(false) From fe9d5f4ac19c75be55069ad562d68887412a9d34 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Mon, 10 Feb 2025 09:20:39 +0100 Subject: [PATCH 08/11] make windows happy --- src/ZZMatrix-linalg.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 1fb676d25..8bab8ba3a 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -721,8 +721,8 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U mod_sym!(U_ptr, R_ptr, m, t) sub!(R_ptr, R_ptr, U_ptr) divexact!(R_ptr, R_ptr, m) - R_ptr += sizeof(Clong) - U_ptr += sizeof(Clong) + R_ptr += sizeof(Int) + U_ptr += sizeof(Int) end i += 1 if i > nrows(U) @@ -744,8 +744,8 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U mod_sym!(U_ptr, R_ptr, m, t) sub!(R_ptr, R_ptr, U_ptr) divexact!(R_ptr, R_ptr, m) - R_ptr += sizeof(Clong) - U_ptr += sizeof(Clong) + R_ptr += sizeof(Int) + U_ptr += sizeof(Int) end i += 1 end @@ -768,8 +768,8 @@ function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int) C_ptr = Nemo.mat_entry_ptr(C, i, 1) for j=1:ncols(A) add!(A_ptr, C_ptr) - A_ptr += sizeof(Clong) - C_ptr += sizeof(Clong) + A_ptr += sizeof(Int) + C_ptr += sizeof(Int) end end end @@ -782,8 +782,8 @@ function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int) C_ptr = Nemo.mat_entry_ptr(C, i, 1) for j=1:ncols(A) sub!(A_ptr, C_ptr) - A_ptr += sizeof(Clong) - C_ptr += sizeof(Clong) + A_ptr += sizeof(Int) + C_ptr += sizeof(Int) end end end From 89e2a3a2d8443c08782ab8cbe7f2ed62d1494ac5 Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Mon, 10 Feb 2025 09:57:39 +0100 Subject: [PATCH 09/11] add a few @preserve --- src/ZZMatrix-linalg.jl | 154 ++++++++++++++++++++++------------------- src/flint/fmpz_mat.jl | 20 +++--- 2 files changed, 94 insertions(+), 80 deletions(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 8bab8ba3a..32c6da90b 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -291,23 +291,25 @@ function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem) bN = ZZRingElem(Val(:raw)) bD = ZZRingElem(Val(:raw)) D = ZZRingElem(1) - for i=1:nrows(a) - a_ptr = Nemo.mat_entry_ptr(a, i, 1) - A_ptr = Nemo.mat_entry_ptr(A, i, 1) - for j=1:ncols(a) - Nemo.set!(T, a_ptr) - Nemo.mul!(T, T, D) - Nemo.mod!(T, T, b) - fl = ratrec!(n, d, T, b, bN, bD) - fl || return fl, A, D - if !isone(d) - mul!(D, D, d) - Nemo.mul!(A, A, d) - end - Nemo.set!(A_ptr, n) + GC.@preserve a A begin + for i=1:nrows(a) + a_ptr = Nemo.mat_entry_ptr(a, i, 1) + A_ptr = Nemo.mat_entry_ptr(A, i, 1) + for j=1:ncols(a) + Nemo.set!(T, a_ptr) + Nemo.mul!(T, T, D) + Nemo.mod!(T, T, b) + fl = ratrec!(n, d, T, b, bN, bD) + fl || return fl, A, D + if !isone(d) + mul!(D, D, d) + Nemo.mul!(A, A, d) + end + Nemo.set!(A_ptr, n) - a_ptr += sizeof(ZZRingElem) - A_ptr += sizeof(ZZRingElem) + a_ptr += sizeof(ZZRingElem) + A_ptr += sizeof(ZZRingElem) + end end end Nemo._fmpz_clear_fn(bN) @@ -565,17 +567,19 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int if ncols(_B) == 1 n = nrows(D.A) - for i=1:n - Ay_ptr = mat_entry_ptr(D.Ay, i, 1) - A_ptr = mat_entry_ptr(D.A, i, 1) - d_ptr = mat_entry_ptr(d, i, 1) - zero!(Ay_ptr) - for j=1:n - y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1) - addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr)) - A_ptr += sizeof(ZZRingElem) + GC.@preserve D d begin + for i=1:n + Ay_ptr = mat_entry_ptr(D.Ay, i, 1) + A_ptr = mat_entry_ptr(D.A, i, 1) + d_ptr = mat_entry_ptr(d, i, 1) + zero!(Ay_ptr) + for j=1:n + y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1) + addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr)) + A_ptr += sizeof(ZZRingElem) + end + sub!(d_ptr, d_ptr, Ay_ptr) end - sub!(d_ptr, d_ptr, Ay_ptr) end else C = CrtCtx_Mat(block) @@ -712,44 +716,46 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U t = ZZRingElem(Val(:raw)) # R = zero_matrix(ZZ, 1, ncols(U)) zero!(R) - while true - R_ptr = Nemo.mat_entry_ptr(R, 1, 1) - U_ptr = Nemo.mat_entry_ptr(U, i, 1) - - for j=1:ncols(U) - add!(R_ptr, R_ptr, U_ptr) - mod_sym!(U_ptr, R_ptr, m, t) - sub!(R_ptr, R_ptr, U_ptr) - divexact!(R_ptr, R_ptr, m) - R_ptr += sizeof(Int) - U_ptr += sizeof(Int) - end - i += 1 - if i > nrows(U) - if i > last || is_zero(R) - Nemo._fmpz_clear_fn(t) - return U + GC.@preserve R U begin + while true + R_ptr = Nemo.mat_entry_ptr(R, 1, 1) + U_ptr = Nemo.mat_entry_ptr(U, i, 1) + + for j=1:ncols(U) + add!(R_ptr, R_ptr, U_ptr) + mod_sym!(U_ptr, R_ptr, m, t) + sub!(R_ptr, R_ptr, U_ptr) + divexact!(R_ptr, R_ptr, m) + R_ptr += sizeof(Int) + U_ptr += sizeof(Int) end - while !is_zero(R) - if i > last + i += 1 + if i > nrows(U) + if i > last || is_zero(R) Nemo._fmpz_clear_fn(t) return U end - U.r += 1 - @assert U.r <= last - U_ptr = Nemo.mat_entry_ptr(U, i, 1) - R_ptr = Nemo.mat_entry_ptr(R, 1, 1) - - for j=1:ncols(U) - mod_sym!(U_ptr, R_ptr, m, t) - sub!(R_ptr, R_ptr, U_ptr) - divexact!(R_ptr, R_ptr, m) - R_ptr += sizeof(Int) - U_ptr += sizeof(Int) + while !is_zero(R) + if i > last + Nemo._fmpz_clear_fn(t) + return U + end + U.r += 1 + @assert U.r <= last + U_ptr = Nemo.mat_entry_ptr(U, i, 1) + R_ptr = Nemo.mat_entry_ptr(R, 1, 1) + + for j=1:ncols(U) + mod_sym!(U_ptr, R_ptr, m, t) + sub!(R_ptr, R_ptr, U_ptr) + divexact!(R_ptr, R_ptr, m) + R_ptr += sizeof(Int) + U_ptr += sizeof(Int) + end + i += 1 end - i += 1 + return U end - return U end end end @@ -763,13 +769,15 @@ end #add!(view(A, c+1:end, :), C) (but without the view) function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int) A.r = max(A.r, C.r+c) - for i=1:nrows(C) - A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) - C_ptr = Nemo.mat_entry_ptr(C, i, 1) - for j=1:ncols(A) - add!(A_ptr, C_ptr) - A_ptr += sizeof(Int) - C_ptr += sizeof(Int) + GC.@preserve A C begin + for i=1:nrows(C) + A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) + C_ptr = Nemo.mat_entry_ptr(C, i, 1) + for j=1:ncols(A) + add!(A_ptr, C_ptr) + A_ptr += sizeof(Int) + C_ptr += sizeof(Int) + end end end end @@ -777,13 +785,15 @@ end #sub!(view(A, c+1:end, :), C) (but without the view) function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int) A.r = max(A.r, C.r+c) - for i=1:nrows(C) - A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) - C_ptr = Nemo.mat_entry_ptr(C, i, 1) - for j=1:ncols(A) - sub!(A_ptr, C_ptr) - A_ptr += sizeof(Int) - C_ptr += sizeof(Int) + GC.@preserve A C begin + for i=1:nrows(C) + A_ptr = Nemo.mat_entry_ptr(A, i+c, 1) + C_ptr = Nemo.mat_entry_ptr(C, i, 1) + for j=1:ncols(A) + sub!(A_ptr, C_ptr) + A_ptr += sizeof(Int) + C_ptr += sizeof(Int) + end end end end diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl index 9db2a3495..369266aa9 100644 --- a/src/flint/fmpz_mat.jl +++ b/src/flint/fmpz_mat.jl @@ -1813,10 +1813,12 @@ function zero!(z::ZZMatrixOrPtr) end function zero_row!(z::ZZMatrix, i::Int) - z_ptr = mat_entry_ptr(z, i, 1) - for i=1:ncols(z) - zero!(z_ptr) - z_ptr += sizeof(Int) + GC.@preserve z begin + z_ptr = mat_entry_ptr(z, i, 1) + for i=1:ncols(z) + zero!(z_ptr) + z_ptr += sizeof(Int) + end end return z end @@ -1842,10 +1844,12 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr) end function sub!(A::ZZMatrix, B::ZZMatrix, m::Int) - for i=1:nrows(A) - A_p = Nemo.mat_entry_ptr(A, i, i) - B_p = Nemo.mat_entry_ptr(B, i, i) - sub!(A_p, B_p, m) + GC.@preserve A begin + for i=1:nrows(A) + A_p = Nemo.mat_entry_ptr(A, i, i) + B_p = Nemo.mat_entry_ptr(B, i, i) + sub!(A_p, B_p, m) + end end return A end From f53ded6ad14b73875eefab780ff2f62ca0fe04a0 Mon Sep 17 00:00:00 2001 From: Tommy Hofmann Date: Mon, 10 Feb 2025 12:11:42 +0100 Subject: [PATCH 10/11] Update src/flint/fmpz_mat.jl --- src/flint/fmpz_mat.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl index 369266aa9..59cea50c4 100644 --- a/src/flint/fmpz_mat.jl +++ b/src/flint/fmpz_mat.jl @@ -1844,7 +1844,7 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr) end function sub!(A::ZZMatrix, B::ZZMatrix, m::Int) - GC.@preserve A begin + GC.@preserve A B begin for i=1:nrows(A) A_p = Nemo.mat_entry_ptr(A, i, i) B_p = Nemo.mat_entry_ptr(B, i, i) From 27a930e4e885b5d2314103bf25efab73821d361c Mon Sep 17 00:00:00 2001 From: Claus Fieker Date: Mon, 10 Feb 2025 12:22:39 +0100 Subject: [PATCH 11/11] add a clear --- src/ZZMatrix-linalg.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl index 32c6da90b..6e0256ab0 100644 --- a/src/ZZMatrix-linalg.jl +++ b/src/ZZMatrix-linalg.jl @@ -282,7 +282,7 @@ end #adaptive rational_reconstruction: if solution is unbalanced with #denominator smaller than numerator -function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem) +function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem; ErrorTolerant ::Bool = false) A = similar(a) T = ZZRingElem(Val(:raw)) @@ -754,6 +754,7 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U end i += 1 end + Nemo._fmpz_clear_fn(t) return U end end