From 1c50638a654be886b5cebab01ac90c9db6cc25f3 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 08:37:09 +0100
Subject: [PATCH 01/11] sanitize some interfaces

---
 src/matrix.jl | 93 ++++++++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 45 deletions(-)

diff --git a/src/matrix.jl b/src/matrix.jl
index d33c8691d..e39d832be 100644
--- a/src/matrix.jl
+++ b/src/matrix.jl
@@ -422,7 +422,7 @@ function _det(a::fpMatrix)
   return r
 end
 
-function map_entries!(a::fpMatrix, k::Nemo.fpField, A::ZZMatrix)
+function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix)
   ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n)
   ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A)
   a.base_ring = k  # exploiting that the internal repr is the indep of char
@@ -433,22 +433,11 @@ function Base.copy!(A::ZZMatrix, B::ZZMatrix)
   ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B)
 end
 
-function Nemo.sub!(A::ZZMatrix, m::Int)
+function sub!(A::ZZMatrix, B::ZZMatrix, m::Int)
   for i=1:nrows(A)
     A_p = Nemo.mat_entry_ptr(A, i, i)
-    sub!(A_p, A_p, m)
-  end
-  return A
-end
-
-# No allocations; also faster than *=
-function Nemo.mul!(A::ZZMatrix, m::ZZRingElem)
-  for i=1:nrows(A)
-    A_p = Nemo.mat_entry_ptr(A, i, 1)
-    for j=1:ncols(A)
-      mul!(A_p, A_p, m)
-      A_p += sizeof(Int)
-    end
+    B_p = Nemo.mat_entry_ptr(B, i, i)
+    sub!(A_p, B_p, m)
   end
   return A
 end
@@ -495,11 +484,12 @@ function is_unimodular(A::ZZMatrix; algorithm=:auto)
   A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A))
   while nbits(M) <= target_bitsize_modulus
     @vprint(:UnimodVerif,2,".")
-    p = next_prime(p + rand(1:2^10))  # increment by a random step to a prime
+    p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime
     ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
-    map_entries!(A_mod_p, ZZmodP, A)
-    @vtime :UnimodVerif 2   det_mod_p = Int(_det(A_mod_p))
-    if det_mod_p > p/2
+    map_entries!(ZZmodP, A_mod_p, A)
+    @vtime :UnimodVerif 2  det_mod_p = Int(_det(A_mod_p))
+    p2 = p>>1
+    if det_mod_p > p2
       det_mod_p -= p
     end
     if abs(det_mod_p) != 1
@@ -511,12 +501,11 @@ function is_unimodular(A::ZZMatrix; algorithm=:auto)
     det_mod_m = det_mod_p
     mul!(M, M, p)
   end
-  return is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm=algorithm)
+  return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm)
 end #function
 
 
-# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use.
-function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto)
+function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto)
   # This UI is convenient for our sophisticated determinant algorithm.
   # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann
   # unimodular verification then call whichever is likely to be faster
@@ -533,7 +522,7 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle
   @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting")
   if algorithm == :pauderis_storjohann
     @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating")
-    return is_unimodular_Pauderis_Storjohann_Hensel(A)
+    return _is_unimodular_Pauderis_Storjohann_Hensel(A)
   end
   n = ncols(A)
   Hrow = hadamard_bound2(A)
@@ -557,7 +546,7 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle
     if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=#
       # Expected RAM requirement is not excessive, and
       # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel
-      return is_unimodular_Pauderis_Storjohann_Hensel(A)
+      return _is_unimodular_Pauderis_Storjohann_Hensel(A)
     end
   end
   if algorithm == :CRT
@@ -578,13 +567,13 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle
     end
     # advance to another prime which does not divide M:
     while true
-      p = next_prime(p+rand(1:stride))
+      p = next_prime(p+rand(1:stride), false)
       if !is_divisible_by(M,p)
         break
       end
     end
     ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
-    map_entries!(A_mod_p, ZZmodP, A)
+    map_entries!(ZZmodP, A_mod_p, A)
     det_mod_p = _det(A_mod_p)
     if det_mod_p > p/2
       det_mod_p -= p
@@ -592,19 +581,24 @@ function is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingEle
     if abs(det_mod_p) != 1 || det_mod_m != det_mod_p
       return false  # obviously not unimodular
     end
-    M *= p
+    mul!(M, M, p)
   end
   return true
 end
 
-# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use.
-# Pauderis-Storjohann unimodular verification/certification.
-# We use Hensel lifting to obtain the modular inverse B0
-# Seems to be faster than the CRT prototype (not incl. here)
-# VERBOSITY via :UnimodVerif
-function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
+#DoublePlusOne complex ##############################
+
+#TODO: Hensel is slower than CRT if modulus is small(ish)
+
+function _PSH_init(A::ZZMatrix)
+  #computes a 
+  # modulus (X) in the paper
+  # the inverse of A mod x
+  # the exponent target for the lifting.
+
   n = nrows(A)
-  # assume ncols == n
+  @assert ncols(A) == n
+
   EntrySize = maximum(abs, A)
   entry_size_bits = maximum(nbits, A)
   e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits))
@@ -618,8 +612,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
   @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)")
   m = ZZ(p)
   ZZmodP = Nemo.Native.GF(p)
-  MatModP = matrix_space(ZZmodP,n,n)
-  B0 = lift(inv(MatModP(A)))
+  B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift
   mod_sym!(B0, m)
   delta = identity_matrix(base_ring(A), n)
   A_mod_m = identity_matrix(base_ring(A), n)
@@ -628,12 +621,12 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
     @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))");
     copy!(A_mod_m, A)
     mm = m^2
-    mod_sym!(A_mod_m,mm)
+    mod_sym!(A_mod_m, mm)
     @vtime :UnimodVerif 2  mul!(delta, A_mod_m, B0)
-    sub!(delta, 1)
+    sub!(delta, delta, 1)
     divexact!(delta, m) # to make matrix product in line below cheaper
     @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta)
-    mul!(A_mod_m, m)
+    mul!(A_mod_m, A_mod_m, m)
     sub!(B0, B0, A_mod_m)
     m = mm
     mod_sym!(B0, m)
@@ -648,11 +641,23 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
   k = 2+nbits(ceil(Int,k/log2(m)))
   @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)")
 
-  ZZmodM,_ = residue_ring(ZZ,m; cached = false)  # m is probably NOT prime
+  return m, B0, k
+end
+
+
+# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use.
+# Pauderis-Storjohann unimodular verification/certification.
+# We use Hensel lifting to obtain the modular inverse B0
+# Seems to be faster than the CRT prototype (not incl. here)
+# VERBOSITY via :UnimodVerif
+function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
+  n = nrows(A)
+  m, B0, k = _PSH_init(A)
+
   ##  @assert is_one(lift(MatModM(B0*A)))
   # Originally: R = (I-A*B0)/m
   R = A*B0
-  sub!(R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring
+  sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring
   divexact!(R, m)
   if is_zero(R)
     return true
@@ -661,9 +666,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
   #ZZMatrix and ZZModMatrix are the same type. The modulus is
   #always passed in as an extra argument when needed...
   #mul! for ZZModMatrix is mul, followed by reduce.
-
-  B0_modm = deepcopy(B0)
-  mod_sym!(B0_modm, m)
+  #thus the code is not using ZZModMatrices at all...
 
   R_bar = zero_matrix(ZZ, n, n)
   M = zero_matrix(ZZ, n, n)
@@ -675,7 +678,7 @@ function is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
     mul!(R_bar, R, R)
     #Next 3 lines do:  M = lift(B0_modm*MatModM(R_bar));
     mod_sym!(R_bar, m)
-    mul!(M, B0_modm, R_bar)
+    mul!(M, B0, R_bar)
     mod_sym!(M, m)
     # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation
     mul!(R, A, M)

From 54f71dc3bb515b5d66de5c4a56db9896ce8538c4 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 15:05:30 +0100
Subject: [PATCH 02/11] move the unimodularity around and add solvers

started ZZMatrix-linalg with
  - is_unimodular
  - dixon/ solve/ init
  - DoublePlusOne solver
added some ZZ/ZZMatrix functions
---
 src/Nemo.jl                  |   1 +
 src/ZZMatrix-linalg.jl       | 978 +++++++++++++++++++++++++++++++++++
 src/flint/fmpz.jl            |  10 +
 src/flint/fmpz_mat.jl        |  23 +
 src/matrix.jl                | 295 -----------
 test/Nemo-test.jl            |   2 +
 test/ZZMatrix-linalg-test.jl |  30 ++
 7 files changed, 1044 insertions(+), 295 deletions(-)
 create mode 100644 src/ZZMatrix-linalg.jl
 create mode 100644 test/ZZMatrix-linalg-test.jl

diff --git a/src/Nemo.jl b/src/Nemo.jl
index 4b3fef149..e283d7b1a 100644
--- a/src/Nemo.jl
+++ b/src/Nemo.jl
@@ -433,6 +433,7 @@ include("Rings.jl")
 
 include("matrix.jl")
 include("poly.jl")
+include("ZZMatrix-linalg.jl")
 
 include("Infinity.jl")
 
diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
new file mode 100644
index 000000000..3ed7444bf
--- /dev/null
+++ b/src/ZZMatrix-linalg.jl
@@ -0,0 +1,978 @@
+##################################################################
+## Functions related to unimodularity verification
+##################################################################
+
+# ***SOURCE ALGORITHM***
+# Refined implementation of method from
+# Authors: C. Pauderis + A. Storjohann
+# Title:   Detrministic Unimodularity Certification
+# Where:   Proc ISSAC 2012, pp. 281--287
+# See also: thesis of Colton Pauderis, 2013, Univ of Waterloo
+#           Deterministic Unimodularity Certification and Applications for Integer Matrices
+
+
+# add_verbosity_scope(:UnimodVerif) -- see func __init__ in src/Nemo.jl
+
+#######################################################
+# Low-level auxiliary functions -- should be elsewhere?
+#######################################################
+
+function _det(a::fpMatrix)
+  a.r < 10 && return lift(det(a))
+  #_det avoids a copy: det is computed destructively
+  r = ccall((:_nmod_mat_det, Nemo.libflint), UInt, (Ref{fpMatrix}, ), a)
+  return r
+end
+
+function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix)
+  ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n)
+  ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A)
+  a.base_ring = k  # exploiting that the internal repr is the indep of char
+  return a
+end
+
+@doc raw"""
+    is_unimodular(A::ZZMatrix)
+    is_unimodular(A::ZZMatrix; algorithm=:auto)
+
+Determine whether `A` is unimodular, i.e. whether `abs(det(A)) == 1`.
+The method used is either that of Pauderis--Storjohann or using CRT;
+the choice is made based on cost estimates for the two approaches.
+
+The kwarg `algorithm` may be specified to be `:CRT` or `:pauderis_storjohann`
+to ensure that the corresponding underlying algorithm is used (after a quick
+initial unimodularity check).  `:CRT` is better if the matrix is unimodular
+and has an inverse containing large entries (or if it is not unimodular);
+`:pauderis_storjohann` is asymptotically faster when the matrix is unimodular
+and its inverse does not contain large entries, but it requires more space
+for intermediate expressions.
+"""
+function is_unimodular(A::ZZMatrix; algorithm=:auto)
+  # Call this function when no extra info about the matrix is available.
+  # It does a preliminary check that det(A) is +/-1 modulo roughly 2^100.
+  # If so, then delegate the complete check to is_unimodular_given_det_mod_m
+  if !is_square(A)
+    throw(ArgumentError("Matrix must be square"))
+  end
+  if !(algorithm in [:auto, :CRT, :pauderis_storjohann])
+    throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]"))
+  end
+  # Deal with two trivial cases
+  if nrows(A) == 0
+    return true
+  end
+  if nrows(A) == 1
+    return (abs(A[1,1]) == 1);
+  end
+  # Compute det mod M -- may later include some more primes
+  target_bitsize_modulus = 100 # magic number -- should not be too small!
+  @vprintln(:UnimodVerif,1,"Quick first check: compute det by crt up to modulus with about $(target_bitsize_modulus) bits")
+  p::Int = 2^20 # start point of primes for initial CRT trial -- det with modulus >= 2^22 is much slower
+  M = ZZ(1)
+  det_mod_m = 0  # 0 means uninitialized, o/w value is 1 or -1
+  A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A))
+  while nbits(M) <= target_bitsize_modulus
+    @vprint(:UnimodVerif,2,".")
+    p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime
+    ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
+    map_entries!(ZZmodP, A_mod_p, A)
+    @vtime :UnimodVerif 2  det_mod_p = Int(_det(A_mod_p))
+    p2 = p>>1
+    if det_mod_p > p2
+      det_mod_p -= p
+    end
+    if abs(det_mod_p) != 1
+      return false  # obviously not unimodular
+    end
+    if det_mod_m != 0 && det_mod_m != det_mod_p
+      return false  # obviously not unimodular
+    end
+    det_mod_m = det_mod_p
+    mul!(M, M, p)
+  end
+  return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm)
+end #function
+
+
+function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto)
+  # This UI is convenient for our sophisticated determinant algorithm.
+  # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann
+  # unimodular verification then call whichever is likely to be faster
+  # (but we avoid P-S if the space estimate is too large).
+  # M is a modulus satisfying M > 2^30;
+  # det_mod_m is det(A) mod M [!not checked!]: it is either +1 or -1.
+  is_square(A) || throw(ArgumentError("Matrix must be square"))
+  abs(det_mod_m) == 1 || throw(ArgumentError("det_mod_m must be +1 or -1"))
+  M >  2^30 || throw(ArgumentError("modulus must be greater than 2^30"))
+  (algorithm in [:auto, :CRT, :pauderis_storjohann]) || throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]"))
+  # Deal with two trivial cases -- does this make sense here???
+  nrows(A) == 0 && return true
+  nrows(A) == 1 && return (abs(A[1,1]) == 1)
+  @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting")
+  if algorithm == :pauderis_storjohann
+    @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating")
+    return _is_unimodular_Pauderis_Storjohann_Hensel(A)
+  end
+  n = ncols(A)
+  Hrow = hadamard_bound2(A)
+  Hcol = hadamard_bound2(transpose(A))
+  DetBoundSq = min(Hrow, Hcol)
+  Hbits = 1+div(nbits(DetBoundSq),2)
+  @vprintln(:UnimodVerif,1,"Hadamard bound in bits $(Hbits)")
+  if algorithm == :auto
+    # Estimate whether better to "climb out" with CRT or use Pauderis-Storjohann
+    CRT_speed_factor = 20.0  # scale factor is JUST A GUESS !!!  (how much faster CRT is compared to P-S)
+    total_entry_size = sum(nbits,A)
+    Estimate_CRT = ceil(((Hbits - nbits(M))*(2.5+total_entry_size/n^3))/CRT_speed_factor)  # total_entry_size/n is proportional to cost of reducing entries mod p
+    EntrySize = maximum(abs, A)
+    entry_size_bits = maximum(nbits, A)
+    @vprintln(:UnimodVerif,1,"entry_size_bits = $(entry_size_bits)   log2(EntrySize) = $(ceil(log2(EntrySize)))")
+    e = max(16, Int(2+ceil(2*log2(n)+log2(EntrySize))))  # see Condition in Fig 1, p.284 of Pauderis-Storjohann
+    Estimate_PS = ceil(e*log(e)) # assumes soft-linear ZZ arith
+    @vprintln(:UnimodVerif,1,"Est_CRT = $(Estimate_CRT)  and  Est_PS = $(Estimate_PS)")
+    Space_estimate_PS = ZZ(n)^2 * e  # nbits of values, ignoring mem mgt overhead
+    @vprintln(:UnimodVerif,1,"Space est PS: $(Space_estimate_PS)  [might be bytes]")
+    if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=#
+      # Expected RAM requirement is not excessive, and
+      # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel
+      return _is_unimodular_Pauderis_Storjohann_Hensel(A)
+    end
+  end
+  if algorithm == :CRT
+    @vprintln(:UnimodVerif,1,"User specified CRT --> delegating")
+  end
+  @vprintln(:UnimodVerif,1,"UnimodularVerification: CRT loop")
+  # Climb out with CRT:
+  # in CRT loop start with 22 bit primes; if we reach threshold (empirical), jump to much larger primes.
+  p = 2^21; stride = 2^10; threshold = 5000000;
+  A_mod_p = zero_matrix(Nemo.Native.GF(2), nrows(A), ncols(A))
+  while nbits(M) < Hbits
+    @vprint(:UnimodVerif,2,".")
+    # Next lines increment p (by random step up to stride) to a prime not dividing M
+    if p > threshold && p < threshold+stride
+      @vprint(:UnimodVerif,2,"!!")
+      p = 2^56; stride = 2^25;  # p = 2^56 is a good choice on my standard 64-bit machine
+      continue
+    end
+    # advance to another prime which does not divide M:
+    while true
+      p = next_prime(p+rand(1:stride), false)
+      if !is_divisible_by(M,p)
+        break
+      end
+    end
+    ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
+    map_entries!(ZZmodP, A_mod_p, A)
+    det_mod_p = _det(A_mod_p)
+    if det_mod_p > p/2
+      det_mod_p -= p
+    end
+    if abs(det_mod_p) != 1 || det_mod_m != det_mod_p
+      return false  # obviously not unimodular
+    end
+    mul!(M, M, p)
+  end
+  return true
+end
+
+#DoublePlusOne complex ##############################
+
+#TODO: Hensel is slower than CRT if modulus is small(ish)
+
+function _PSH_init(A::ZZMatrix)
+  #computes a 
+  # modulus (X) in the paper
+  # the inverse of A mod x
+  # the exponent target for the lifting.
+
+  n = nrows(A)
+  @assert ncols(A) == n
+
+  EntrySize = maximum(abs, A)
+  entry_size_bits = maximum(nbits, A)
+  e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits))
+  ModulusLWB = ZZ(2)^e
+  @vprintln(:UnimodVerif,1,"ModulusLWB is 2^$(e)")
+  # Now look for prime: about 25 bits, and such that p^(2^sthg) is
+  # just slightly bigger than ModulusLWB.  Bit-size of prime matters little.
+  root_order = exp2(ceil(log2(e/25)))
+  j = ceil(exp2(e/root_order))
+  p = next_prime(Int(j))
+  @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)")
+  m = ZZ(p)
+  ZZmodP = Nemo.Native.GF(p)
+  B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift
+  mod_sym!(B0, m)
+  delta = identity_matrix(base_ring(A), n)
+  A_mod_m = identity_matrix(base_ring(A), n)
+  @vprintln(:UnimodVerif,1,"Starting stage 1 lifting")
+  while (m < ModulusLWB)
+    @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))");
+    copy!(A_mod_m, A)
+    mm = m^2
+    mod_sym!(A_mod_m, mm)
+    @vtime :UnimodVerif 2  mul!(delta, A_mod_m, B0)
+    sub!(delta, delta, 1)
+    divexact!(delta, m) # to make matrix product in line below cheaper
+    @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta)
+    mul!(A_mod_m, A_mod_m, m)
+    sub!(B0, B0, A_mod_m)
+    m = mm
+    mod_sym!(B0, m)
+  end
+  @vprintln(:UnimodVerif,1,"Got stage 1 modular inverse: modulus has $(nbits(m)) bits; value is $(p)^$(Int(round(log2(m)/log2(p))))")
+
+  # Hadamard bound brings little benefit; anyway, almost never lift all the way
+###  H = min(hadamard_bound2(A), hadamard_bound2(transpose(A)))
+###  println("nbits(H) = $(nbits(H))");
+  # Compute max num iters in k
+  k = (n-1)*(entry_size_bits+log2(n)/2) - (2*log2(n) + entry_size_bits -1)
+  k = 2+nbits(ceil(Int,k/log2(m)))
+  @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)")
+
+  return m, B0, k
+end
+
+
+# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use.
+# Pauderis-Storjohann unimodular verification/certification.
+# We use Hensel lifting to obtain the modular inverse B0
+# Seems to be faster than the CRT prototype (not incl. here)
+# VERBOSITY via :UnimodVerif
+function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
+  n = nrows(A)
+  m, B0, k = _PSH_init(A)
+
+  ##  @assert is_one(lift(MatModM(B0*A)))
+  # Originally: R = (I-A*B0)/m
+  R = A*B0
+  sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring
+  divexact!(R, m)
+  if is_zero(R)
+    return true
+  end
+
+  #ZZMatrix and ZZModMatrix are the same type. The modulus is
+  #always passed in as an extra argument when needed...
+  #mul! for ZZModMatrix is mul, followed by reduce.
+  #thus the code is not using ZZModMatrices at all...
+
+  R_bar = zero_matrix(ZZ, n, n)
+  M = zero_matrix(ZZ, n, n)
+
+  @vprintln(:UnimodVerif,1,"Starting stage 2 lifting")
+  for i in 0:k-1
+    @vprintln(:UnimodVerif,1,"Stage 2 loop: i=$(i)")
+
+    mul!(R_bar, R, R)
+    #Next 3 lines do:  M = lift(B0_modm*MatModM(R_bar));
+    mod_sym!(R_bar, m)
+    mul!(M, B0, R_bar)
+    mod_sym!(M, m)
+    # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation
+    mul!(R, A, M)
+    sub!(R, R_bar, R)
+    divexact!(R, m)
+    if is_zero(R)
+      return true
+    end
+  end
+  return false
+end
+
+#adaptive rational_reconstruction: if solution is unbalanced with
+#denominator smaller than numerator
+function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem)
+  A = similar(a)
+
+  T = ZZRingElem(Val(:raw))
+  n = ZZRingElem(Val(:raw))
+  d = ZZRingElem(Val(:raw))
+  bN = ZZRingElem(Val(:raw))
+  bD = ZZRingElem(Val(:raw))
+  D = ZZRingElem(1)
+  for i=1:nrows(a)
+    a_ptr = Nemo.mat_entry_ptr(a, i, 1)
+    A_ptr = Nemo.mat_entry_ptr(A, i, 1)
+    for j=1:ncols(a)
+      Nemo.set!(T, a_ptr)
+      Nemo.mul!(T, T, D)
+      Nemo.mod!(T, T, b)
+      fl = ratrec!(n, d, T, b, bN, bD)
+      fl || return fl, A, D
+      if !isone(d)
+        mul!(D, D, d)
+        Nemo.mul!(A, A, d)
+      end
+      Nemo.set!(A_ptr, n)
+
+      a_ptr += sizeof(ZZRingElem)
+      A_ptr += sizeof(ZZRingElem)
+    end
+  end
+  Nemo._fmpz_clear_fn(bN)
+  Nemo._fmpz_clear_fn(bD)
+  Nemo._fmpz_clear_fn(n)
+  Nemo._fmpz_clear_fn(d)
+  Nemo._fmpz_clear_fn(T)
+
+  return true, A, D
+end
+
+#output sensitive rational_reconstruction, in particular if
+#numerator is larger than den 
+function ratrec!(n::ZZRingElem, d::ZZRingElem, a::ZZRingElem, b::ZZRingElem, N::ZZRingElem = ZZ(), D::ZZRingElem= ZZ())
+  k = nbits(b)
+  l = 1
+  set!(N, b)
+  set!(D, 2)
+
+#  @assert 0<a<b
+  done = false
+  while !done && D <= N
+    Nemo.mul!(D, D, D)
+    tdiv_q!(N, b, D)
+    shift_right!(N, N, 1)
+    if D>N
+      @ccall Nemo.libflint.fmpz_root(N::Ref{ZZRingElem}, b::Ref{ZZRingElem}, 2::Int)::Nothing
+      shift_right!(D, N, 1)
+      done = true
+    end
+
+#    @assert 2*N*D < b
+
+    fl = ccall((:_fmpq_reconstruct_fmpz_2, Nemo.libflint), Bool, (Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}), n, d, a, b, N, D)
+
+    if fl && (nbits(n)+nbits(d) < k - 30 || D>N)
+      return fl
+    end
+    l += 1
+  end
+  return false
+end
+
+function change_prime!(a::fpMatrix, p::UInt)
+  @ccall libflint.nmod_mat_set_mod(a::Ref{fpMatrix}, p::UInt)::Nothing
+end
+
+function lift!(A::ZZMatrix, a::fpMatrix)
+  ccall((:fmpz_mat_set_nmod_mat, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{fpMatrix}), A, a)
+end
+
+mutable struct DixonCtx
+  Ainv::fpMatrix
+  p::UInt
+  crt_primes::Vector{UInt}
+  A_mod::Vector{fpMatrix}
+  d_mod::fpMatrix
+  y_mod::fpMatrix
+  Ay_mod::fpMatrix
+  Ay::ZZMatrix
+  x::ZZMatrix
+  d::ZZMatrix
+  A::ZZMatrix
+  function DixonCtx()
+    return new()
+  end
+end
+
+#copied from flint to allow the use of adaptive reconstruction,
+function dixon_init(A::ZZMatrix, B::ZZMatrix; side::Symbol = :right)
+  return dixon_init!(DixonCtx(), A, B; side)
+end
+
+function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right)
+
+  if side == :right
+    @assert nrows(B) == ncols(A)
+  else
+    @assert ncols(B) == nrows(A)
+  end
+
+  if isdefined(D, :A)
+    @assert size(A) == size(D.A)
+  end
+
+  if isdefined(D, :x)
+    @assert size(D.x) == size(B)
+  end
+
+  if side == :right
+    if isdefined(D, :A)
+      copy!(D.A, A)
+    else
+      D.A = A
+    end
+    ncB = ncols(B)
+  else
+    if isdefined(D, :A)
+      transpose!(D.A, A)
+    else
+      D.A = transpose(A)
+    end
+    ncB = nrows(B)
+  end
+
+  n = nrows(D.A)
+
+  p = next_prime(2^59, false)
+  while true
+    if isdefined(D, :Ainv)
+      map_entries!(Native.GF(p, cached = false, check = false), D.Ainv, D.A)
+    else
+      D.Ainv = map_entries(Native.GF(p, cached = false, check = false), D.A)
+    end
+    try
+      D.Ainv = inv(D.Ainv)
+      break
+    catch e
+      if e != ErrorException || e.msg != "Matrix not invertible"
+        rethrow(e)
+      end
+    end
+    p = next_prime(p, false)
+  end
+  @assert p != 0
+  D.p = p
+
+  if !isdefined(D, :crt_primes)
+    D.crt_primes = UInt[]
+    D.A_mod = fpMatrix[]
+  end
+
+  k = Nemo.fpField(UInt(p), false)
+  if !isdefined(D, :d_mod)
+    D.d_mod = zero_matrix(k, n, ncB)
+    D.y_mod = zero_matrix(k, n, ncB)
+    D.Ay_mod = zero_matrix(k, n, ncB)
+    D.Ay = zero_matrix(ZZ, n, ncB)
+    D.x = zero_matrix(ZZ, n, ncB)
+  end
+
+  if ncB == 1
+    return D
+  end
+  
+  pr = ZZRingElem(1)
+  xp = next_prime(p, false)
+  mA = maximum(abs, A)
+  maxA = mA *(p-1)*n*2
+
+  while true
+    push!(D.crt_primes, xp)
+    push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), A))
+    mul!(pr, pr, xp)
+    if pr > maxA
+      break
+    end
+    xp = next_prime(xp, false)
+  end
+
+  return D
+end
+
+function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int = 10) 
+  if side == :right
+    d = deepcopy(B)
+    _B = B
+  else
+    d = transpose(B)
+    _B = transpose(B) 
+  end
+  @assert ncols(D.A) == nrows(d)
+  @assert size(D.x) == size(d)
+  #we're solveing Ax=B, always. if side = :left, then the interface transposes
+  zero!(D.x)
+ 
+  mA = maximum(abs, D.A)
+  mB = maximum(abs, B)
+  n = nrows(D.A)
+  #bound for the solution is hadarmat/ cramer
+  bound = max((n-1)*mA^2+mB^2, n*mA^2)^n * 2^30
+
+  ppow = ZZRingElem(1)
+  i = 1
+  nexti = 1
+  while ppow <= bound
+    map_entries!(Nemo.fpField(D.p, false), D.d_mod, d)
+
+    Nemo.mul!(D.y_mod, D.Ainv, D.d_mod)
+    ccall((:fmpz_mat_scalar_addmul_nmod_mat_fmpz, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{fpMatrix}, Ref{ZZRingElem}), D.x, D.y_mod, ppow)
+
+    Nemo.mul!(ppow, ppow, D.p)
+    
+    if ppow > bound
+      break
+    end
+
+    stabilised = i == nexti;
+    i += 1
+
+    if stabilised
+      nexti = ceil(Int,(i*1.4)) + 1;
+      #TODO: maybe col by col? to stop doing cols that are already there?
+      #main use currently is 1 col anyway
+      fl, num, den = induce_rational_reconstruction(D.x, ppow)
+
+      if fl
+#        @show fl = (D.A*num == den*_B)
+         sz = max(maximum(nbits, D.A) + maximum(nbits, num)
+                                     + nbits(ncols(_B)) + 1,
+                 maximum(nbits, _B) + nbits(den))
+
+        if sz < nbits(ppow)
+           #we know the product is correct as the p-adic precision
+           #was high enough already...
+#          @show "no prod neccessary"
+        else
+          xp = next_prime(D.p)
+#          @show ceil(Int, (sz - nbits(ppow))/60)
+          local Ap::fpMatrix
+          local Bp::fpMatrix
+          local nump::fpMatrix
+          local lhs::fpMatrix
+          local rhs::fpMatrix
+          for i=1:ceil(Int, (sz - nbits(ppow))/60)
+            k = Nemo.fpField(xp, false)
+            if i == 1
+              Ap = map_entries(k, D.A)
+              Bp = map_entries(k, _B)
+              nump = map_entries(k, num)
+              lhs = Ap*nump
+              rhs = Bp*k(den)
+            else
+              map_entries!(Ap, k, D.A)
+              map_entries!(Bp, k, _B)
+              map_entries!(nump, k, num)
+              mul!(lhs, Ap, nump)
+              mul!(rhs, Bp, k(den))
+            end
+            fl = lhs == rhs
+            fl || break
+            xp = next_prime(xp)
+          end
+        end
+
+        if side == :right
+          fl && return num, den
+        else
+          fl && return transpose(num), den
+        end
+      end
+    end
+
+    if ncols(_B) == 1
+      n = nrows(D.A)
+      for i=1:n
+        Ay_ptr = mat_entry_ptr(D.Ay, i, 1)
+        A_ptr = mat_entry_ptr(D.A, i, 1)
+        d_ptr = mat_entry_ptr(d, i, 1)
+        zero!(Ay_ptr)
+        for j=1:n
+          y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1)
+          addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr))
+          A_ptr += sizeof(ZZRingElem)
+        end
+        sub!(d_ptr, d_ptr, Ay_ptr)
+      end
+    else
+      C = CrtCtx_Mat(block)
+      for j=1:length(D.crt_primes)
+        change_prime!(D.y_mod, D.crt_primes[j])
+        change_prime!(D.Ay_mod, D.crt_primes[j])
+
+        mul!(D.Ay_mod, D.A_mod[j], D.y_mod)
+        push!(C, D.Ay_mod)
+      end
+      change_prime!(D.y_mod, D.p)
+      finish(C)
+      Nemo.sub!(d, d, finish(C))
+    end
+    divexact!(d, d, ZZ(D.p))
+  end
+  fl, num, den = induce_rational_reconstruction(D.x, ppow)
+  @assert fl
+
+  if side == :right
+    return num, den
+  else
+    return transpose(num), den
+  end
+end
+
+function dixon_solve(A::ZZMatrix, B::ZZMatrix; side::Symbol = :right)
+  d = dixon_init(A, B; side)
+  return dixon_solve(d, B; side)
+end
+
+#iterative CRT for matrices
+#will use flint (induce_crt!) for blocks of ?? matrices and then switch
+#to some tree-based asymptotically fast version.
+
+function induce_crt!(A::ZZMatrix, p::ZZRingElem, B::fpMatrix; signed::Bool = false)
+  #the second modulus is implicit in B: B.n
+
+  ccall((:fmpz_mat_CRT_ui, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}, Ref{ZZRingElem}, Ref{fpMatrix}, Int), A, A, p, B, signed)
+  Nemo.mul!(p, p, B.n)
+  return
+end
+
+mutable struct CrtCtx_Mat
+  c::Int #how many primes before tree
+
+  d::Vector{ZZRingElem} #the product tree so far
+  M::Vector{ZZMatrix}
+
+  pos::Int
+  cc::Int
+
+  function CrtCtx_Mat(c::Int = 1)
+    return new(c, ZZRingElem[], ZZMatrix[], 0, 0)
+  end
+end
+
+function Base.push!(C::CrtCtx_Mat, a::fpMatrix)
+  if C.pos == 0
+    push!(C.M, lift(a))
+    push!(C.d, ZZ(a.n))
+    C.cc = 1
+    C.pos += 1
+    return
+  end
+  pos = C.pos
+  if C.cc == 0
+    if pos > length(C.M)
+      push!(C.M, lift(a))
+      push!(C.d, ZZ(a.n))
+    else
+      C.M[pos] = lift(a)
+      C.d[pos] = ZZ(a.n)
+    end
+    C.cc = 1
+  else
+    induce_crt!(C.M[C.pos], C.d[C.pos], a; signed = true)
+    C.cc += 1
+  end
+  if C.cc >= C.c #full...
+    while pos > 1 && C.d[pos-1] < C.d[pos] #assuming primes are in order
+      g, e, f = gcdx(C.d[pos-1], C.d[pos])
+      @assert isone(g)
+      mul!(C.M[pos-1], f*C.d[pos])
+      mul!(C.M[pos], e*C.d[pos-1])
+      add!(C.M[pos-1], C.M[pos-1], C.M[pos])
+      C.d[pos-1] *= C.d[pos]
+      #@show maximum(nbits, C.M[pos-1])
+      mod_sym!(C.M[pos-1], C.d[pos-1])
+      pos -= 1
+    end
+    C.pos = pos + 1
+    C.cc = 0
+  end
+end
+
+function finish(C::CrtCtx_Mat)
+  pos = C.pos
+  if C.cc == 0
+    pos -= 1
+  end
+  while pos > 1
+    g, e, f = gcdx(C.d[pos-1], C.d[pos])
+    @assert isone(g)
+    mul!(C.M[pos-1], f*C.d[pos])
+    mul!(C.M[pos], e*C.d[pos-1])
+    add!(C.M[pos-1], C.M[pos-1], C.M[pos])
+    C.d[pos-1] *= C.d[pos]
+#    @show maximum(nbits, C.M[pos-1]), nbits(C.d[pos-1])
+    mod_sym!(C.M[pos-1], C.d[pos-1])
+    pos -= 1
+  end
+  return C.M[1]
+end
+
+
+####################################################################
+# DoublePlusOne Solver
+####################################################################
+
+#U represents a 1 x n ZZ matrix in base m
+# each row is a single (symmetric) p-adic digit
+# (or should be). At this point the entries can be too large, the
+# carries will have to be propagated down
+# in the algorithm
+# - the top is never changed, we we will not look at it (again)
+# - the matrix is larger than it seems: U.r is less than actually allocated
+#   so it will be increased...
+# Handle with care.
+#_to_base below will actually convert U, inplace, to the correct ZZMatrix
+function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U), R::ZZMatrix = zero_matrix(ZZ, 1, ncols(U)))
+
+  i = start
+  t = ZZRingElem(Val(:raw))
+#  R =  zero_matrix(ZZ, 1, ncols(U))
+  zero!(R)
+  while true
+    R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
+    U_ptr = Nemo.mat_entry_ptr(U, i, 1)
+  
+    for j=1:ncols(U)
+      add!(R_ptr, R_ptr, U_ptr)
+      mod_sym!(U_ptr, R_ptr, m, t)
+      sub!(R_ptr, R_ptr, U_ptr)
+      divexact!(R_ptr, R_ptr, m)
+      R_ptr += sizeof(Clong)
+      U_ptr += sizeof(Clong)
+    end
+    i += 1
+    if i > nrows(U)
+      if i > last || is_zero(R)
+        Nemo._fmpz_clear_fn(t)
+        return U
+      end
+      while !is_zero(R)
+        if i > last
+          Nemo._fmpz_clear_fn(t)
+          return U
+        end
+        U.r += 1
+        @assert U.r <= last
+        U_ptr = Nemo.mat_entry_ptr(U, i, 1)
+        R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
+   
+        for j=1:ncols(U)
+          mod_sym!(U_ptr, R_ptr, m, t)
+          sub!(R_ptr, R_ptr, U_ptr)
+          divexact!(R_ptr, R_ptr, m)
+          R_ptr += sizeof(Clong)
+          U_ptr += sizeof(Clong)
+        end
+        i += 1
+      end
+      return U
+    end
+  end
+end
+
+
+function mod_sym!(a::Ptr{ZZRingElem}, b::Ptr{ZZRingElem}, c::ZZRingElem, t::ZZRingElem = ZZ(0))
+  ccall((:fmpz_ndiv_qr, Nemo.libflint), Cvoid, (Ref{ZZRingElem}, Ptr{ZZRingElem}, Ptr{ZZRingElem}, Ref{ZZRingElem}), t, a, b, c)
+end
+
+#add C into A[c:end, :]
+#add!(view(A, c+1:end, :), C) (but without the view)
+function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
+  A.r = max(A.r, C.r+c)
+  for i=1:nrows(C)
+    A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
+    C_ptr = Nemo.mat_entry_ptr(C, i, 1)
+    for j=1:ncols(A)
+      add!(A_ptr, C_ptr)
+      A_ptr += sizeof(Clong)
+      C_ptr += sizeof(Clong)
+    end
+  end
+end
+
+#sub!(view(A, c+1:end, :), C) (but without the view)
+function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
+  A.r = max(A.r, C.r+c)
+  for i=1:nrows(C)
+    A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
+    C_ptr = Nemo.mat_entry_ptr(C, i, 1)
+    for j=1:ncols(A)
+      sub!(A_ptr, C_ptr)
+      A_ptr += sizeof(Clong)
+      C_ptr += sizeof(Clong)
+    end
+  end
+end
+
+#saves essentially the allocation of A.rows
+function view!(A::ZZMatrix, B::ZZMatrix, r::UnitRange{Int}, ::Colon)
+  A.r = length(r)
+  A.c = B.c
+  A.entries = B.entries
+  for i=1:A.r
+    unsafe_store!(A.rows, unsafe_load(B.rows, r.start+i-1), i)# + 8*(c.start -1))
+  end
+  A.view_parent = (B, )
+  return A
+end
+
+# "solves" xA = U using the double + 1 method
+#Pauderis and Storjohann, same as uni-cert.
+# the functions returns
+# - a matrix X in ZZ and a denominator d
+#
+#TODO: 
+# - _renorm using preinv (to speed things up)
+# - if |U| >> |A| use the Abbott trick: write U in base ? and solve multiple
+#   systems - otherwise the X below needs to be too large and/ or the
+#   _renorm has to work too much
+# - a special version for odd determinant where X is a power of 2 or better
+#   2^64 to use limbs (thus making modm _renorm and _to_base trivial)
+#   (and ugly to implement)
+#
+
+function UniCertSolve(A::ZZMatrix, U::ZZMatrix)
+  n = nrows(A);
+  @assert ncols(A) == ncols(U) == n
+  @assert maximum(nbits, A) >= maximum(nbits, U)
+
+  GC_d = GC.enable(false) #within this function essentially
+    #nothing can be collected, all(?) operations are julia inplace
+    #and use a lot of memory (fir large examples)
+
+  m, B0, k = Nemo._PSH_init(A)
+  if GC_d
+    GC.enable(true)
+    GC.gc()
+    GC.enable(false)
+  end
+
+  @vprintln(:UnimodVerif,1, "Solve with DoublePlusOne: Max num iters is k=$(k)")
+
+  local pr::Int
+  pr = 2^(k+2)-2
+  allV = ZZMatrix[]
+  for i=1:nrows(U)
+    V = zero_matrix(ZZ, pr+10, ncols(U))
+    V.r = 1
+    push!(allV, V)
+  end
+#  @assert maximum(abs, U) <= m
+
+  R = A*B0
+  sub!(R, R, 1)
+  divexact!(R, m)
+  #R = (I-A*B0)/m
+  
+  tmp_V = zero_matrix(ZZ, max(pr + 10, n), ncols(U))
+  RE_tmp = zero_matrix(ZZ, 1, ncols(U))
+  view_V = view(tmp_V, :, :)
+  view_U = view(U, :, :)
+  view_tmp_V = view(tmp_V, :, :)
+
+  for i=1:nrows(U)
+    V = allV[i]
+    mul!(view!(view_V, V, 1:1, :), view!(view_U, U, i:i, :), B0)
+    V.r = 1
+    _renorm(V, m; last = 10, R = RE_tmp)
+
+    tV = view!(view_tmp_V, tmp_V, 1:nrows(V), :)
+    mul!(tV, V, R)
+    sub_into!(V, tV, 1) #sol mod m^2
+    _renorm(V, m; last = 10, R = RE_tmp)
+  end
+
+  if is_zero(R)
+    mu = vcat([_to_base!(t, m) for t = allV]...)
+    tau = induce_rational_reconstruction(mu, m)
+    @assert tau[1]
+    GC.enable(GC_d)
+    return tau[2], tau[3]
+  end
+  ex = 1
+
+  R_bar = zero_matrix(ZZ, n, n)
+  M = zero_matrix(ZZ, n, n)
+
+  for i in 1:k
+    @vprintln(:UnimodVerif, 1,  "DoublePlusOne: loop: i=$(i)")
+    #@time R_bar = R^2;
+    mul!(R_bar, R, R)
+    vT = view!(view_tmp_V, tmp_V, 1:n, :)
+    mod_sym!(vT, R_bar, m)
+    mul!(M, B0, vT)
+    mod_sym!(M, m)
+    mul!(R, A, M)
+    sub!(R, R_bar, R)
+    divexact!(R, m)
+
+    for j=1:nrows(U)
+      V =allV[j]
+
+      vT = view!(view_tmp_V, tmp_V, 1:1, :)
+      mul!(vT, view!(view_U, U, j:j, :), M)
+      add_into!(V, vT, 2*ex)  #sol mod 2ex+1
+      _renorm(V, m, start = 2*ex, last = pr, R = RE_tmp)
+    end
+
+    ex = 2*ex + 1
+
+    for jj = 1:nrows(U)
+      V = allV[jj]
+
+      if nrows(V) > nrows(R) #this part should be automatic...
+        n = nrows(R)
+        l = nrows(V)
+        e = div(l, n)
+        vT = view(tmp_V, 1:l-e*n, :)
+        mul!(vT, view(V, e*n+1:l, :), R)
+        add_into!(V, vT, ex+e*n)
+        vT = view(tmp_V, 1:n, :)
+        for j=e:-1:1
+          @assert j*n <= pr
+          mul!(vT, view(V, (j-1)*n+1:j*n, :), R)
+          add_into!(V, vT, ex+(j-1)*n)
+        end
+      else
+        vT = view!(view_tmp_V, tmp_V, 1:nrows(V), :)
+        mul!(vT, V, R)
+        add_into!(V, vT, ex) #sol mod 2(2ex+1)
+      end
+      _renorm(V, m, start = ex-1, last = pr, R = RE_tmp)
+      @assert V === allV[jj]
+    end
+
+    mex = m^(2*ex)
+    mu = vcat([_to_base!(t[:, 1:1], m) for t = allV]...)
+    tau = induce_rational_reconstruction(mu, mex)
+    if tau[1]
+      GC.enable(true)
+      mu = vcat([_to_base!(deepcopy(t), m) for t = allV]...)
+      tau = induce_rational_reconstruction(mu, mex)
+      if tau[1]
+        GC.enable(GC_d)
+        return mu, m
+      end
+      GC.gc()
+      GC.enable(false)
+    end
+  end
+  mu = vcat([_to_base!(t, m) for t = allV]...)
+  tau = induce_rational_reconstruction(mu, mex)
+  @assert tau[1]
+  GC.enable(GC_d)
+  return tau[2], tau[3]
+end
+
+#inplace conversion
+#the rows of a are encoding a base-b row vector
+#this computes asymptotically fast(?) the ZZ-version
+function _to_base!(a::ZZMatrix, _b::ZZRingElem)
+  nr = nrows(a)
+  bb = ZZRingElem(Val(:raw))
+  b = ZZRingElem(Val(:raw))
+  set!(b, _b)
+  while nr > 1
+    mul!(bb, b, b)
+    for i=1:div(nr, 2)
+      add_row!(a, b, 2*i-1, 2*i)
+      swap_rows!(a, i, 2*i-1)
+      zero_row!(a, 2*i) #to release memory early
+    end
+    if is_odd(nr)
+      add_row!(a, bb, div(nr, 2), nr)
+      zero_row!(a, nr)
+    end
+    nr = div(nr, 2)
+    b.d, bb.d = bb.d, b.d
+  end
+  Nemo._fmpz_clear_fn(bb)
+  Nemo._fmpz_clear_fn(b)
+  #all rows, but the 1st have been set to zero! - or have never been
+  #used. So no memory is lost...
+  a.r = 1
+  return a
+end
+
diff --git a/src/flint/fmpz.jl b/src/flint/fmpz.jl
index 2c359921a..a6395bad6 100644
--- a/src/flint/fmpz.jl
+++ b/src/flint/fmpz.jl
@@ -2618,6 +2618,14 @@ end
 
 #
 
+function tdiv_q!(a::ZZRingElem, b::ZZRingElem, c::ZZRingElem)
+  ccall((:fmpz_tdiv_q, Nemo.libflint), Cvoid, (Ref{ZZRingElem}, Ref{ZZRingElem}, Ref{ZZRingElem}), a, b, c)
+end
+
+function shift_right!(a::ZZRingElem, b::ZZRingElem, i::Int)
+  @ccall Nemo.libflint.fmpz_fdiv_q_2exp(a::Ref{ZZRingElem}, b::Ref{ZZRingElem}, i::Int)::Nothing
+end
+
 function pow!(z::ZZRingElemOrPtr, a::ZZRingElemOrPtr, b::Integer)
   @ccall libflint.fmpz_pow_ui(z::Ref{ZZRingElem}, a::Ref{ZZRingElem}, UInt(b)::UInt)::Nothing
   return z
@@ -3225,3 +3233,5 @@ function resultant(f::ZZPolyRingElem, g::ZZPolyRingElem, d::ZZRingElem, nb::Int)
   @ccall libflint.fmpz_poly_resultant_modular_div(z::Ref{ZZRingElem}, f::Ref{ZZPolyRingElem}, g::Ref{ZZPolyRingElem}, d::Ref{ZZRingElem}, nb::Int)::Nothing
   return z
 end
+
+
diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl
index 0b64238af..9db2a3495 100644
--- a/src/flint/fmpz_mat.jl
+++ b/src/flint/fmpz_mat.jl
@@ -1803,11 +1803,24 @@ end
 #
 ###############################################################################
 
+function Base.copy!(A::ZZMatrix, B::ZZMatrix)
+  ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B)
+end
+
 function zero!(z::ZZMatrixOrPtr)
   @ccall libflint.fmpz_mat_zero(z::Ref{ZZMatrix})::Nothing
   return z
 end
 
+function zero_row!(z::ZZMatrix, i::Int)
+  z_ptr = mat_entry_ptr(z, i, 1)
+  for i=1:ncols(z)
+    zero!(z_ptr)
+    z_ptr += sizeof(Int)
+  end
+  return z
+end
+
 function one!(z::ZZMatrixOrPtr)
   @ccall libflint.fmpz_mat_one(z::Ref{ZZMatrix})::Nothing
   return z
@@ -1828,6 +1841,15 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr)
   return z
 end
 
+function sub!(A::ZZMatrix, B::ZZMatrix, m::Int)
+  for i=1:nrows(A)
+    A_p = Nemo.mat_entry_ptr(A, i, i)
+    B_p = Nemo.mat_entry_ptr(B, i, i)
+    sub!(A_p, B_p, m)
+  end
+  return A
+end
+
 function mul!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr)
   @ccall libflint.fmpz_mat_mul(z::Ref{ZZMatrix}, x::Ref{ZZMatrix}, y::Ref{ZZMatrix})::Nothing
   return z
@@ -2151,3 +2173,4 @@ end
 ################################################################################
 
 mat_entry_ptr(A::ZZMatrix, i::Int, j::Int) = unsafe_load(A.rows, i) + (j-1)*sizeof(ZZRingElem)
+
diff --git a/src/matrix.jl b/src/matrix.jl
index e39d832be..ef06b1c81 100644
--- a/src/matrix.jl
+++ b/src/matrix.jl
@@ -395,298 +395,3 @@ function reduce_mod(A::MatElem{T}, B::MatElem{T}) where {T<:FieldElem}
   return C
 end
 
-
-##################################################################
-## Functions related to unimodularity verification
-##################################################################
-
-# ***SOURCE ALGORITHM***
-# Refined implementation of method from
-# Authors: C. Pauderis + A. Storjohann
-# Title:   Detrministic Unimodularity Certification
-# Where:   Proc ISSAC 2012, pp. 281--287
-# See also: thesis of Colton Pauderis, 2013, Univ of Waterloo
-#           Deterministic Unimodularity Certification and Applications for Integer Matrices
-
-
-# add_verbosity_scope(:UnimodVerif) -- see func __init__ in src/Nemo.jl
-
-#######################################################
-# Low-level auxiliary functions -- should be elsewhere?
-#######################################################
-
-function _det(a::fpMatrix)
-  a.r < 10 && return lift(det(a))
-  #_det avoids a copy: det is computed destructively
-  r = ccall((:_nmod_mat_det, Nemo.libflint), UInt, (Ref{fpMatrix}, ), a)
-  return r
-end
-
-function map_entries!(k::Nemo.fpField, a::fpMatrix, A::ZZMatrix)
-  ccall((:nmod_mat_set_mod, Nemo.libflint), Cvoid, (Ref{fpMatrix}, UInt), a, k.n)
-  ccall((:fmpz_mat_get_nmod_mat, Nemo.libflint), Cvoid, (Ref{fpMatrix}, Ref{ZZMatrix}), a, A)
-  a.base_ring = k  # exploiting that the internal repr is the indep of char
-  return a
-end
-
-function Base.copy!(A::ZZMatrix, B::ZZMatrix)
-  ccall((:fmpz_mat_set, Nemo.libflint), Cvoid, (Ref{ZZMatrix}, Ref{ZZMatrix}), A, B)
-end
-
-function sub!(A::ZZMatrix, B::ZZMatrix, m::Int)
-  for i=1:nrows(A)
-    A_p = Nemo.mat_entry_ptr(A, i, i)
-    B_p = Nemo.mat_entry_ptr(B, i, i)
-    sub!(A_p, B_p, m)
-  end
-  return A
-end
-
-@doc raw"""
-    is_unimodular(A::ZZMatrix)
-    is_unimodular(A::ZZMatrix; algorithm=:auto)
-
-Determine whether `A` is unimodular, i.e. whether `abs(det(A)) == 1`.
-The method used is either that of Pauderis--Storjohann or using CRT;
-the choice is made based on cost estimates for the two approaches.
-
-The kwarg `algorithm` may be specified to be `:CRT` or `:pauderis_storjohann`
-to ensure that the corresponding underlying algorithm is used (after a quick
-initial unimodularity check).  `:CRT` is better if the matrix is unimodular
-and has an inverse containing large entries (or if it is not unimodular);
-`:pauderis_storjohann` is asymptotically faster when the matrix is unimodular
-and its inverse does not contain large entries, but it requires more space
-for intermediate expressions.
-"""
-function is_unimodular(A::ZZMatrix; algorithm=:auto)
-  # Call this function when no extra info about the matrix is available.
-  # It does a preliminary check that det(A) is +/-1 modulo roughly 2^100.
-  # If so, then delegate the complete check to is_unimodular_given_det_mod_m
-  if !is_square(A)
-    throw(ArgumentError("Matrix must be square"))
-  end
-  if !(algorithm in [:auto, :CRT, :pauderis_storjohann])
-    throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]"))
-  end
-  # Deal with two trivial cases
-  if nrows(A) == 0
-    return true
-  end
-  if nrows(A) == 1
-    return (abs(A[1,1]) == 1);
-  end
-  # Compute det mod M -- may later include some more primes
-  target_bitsize_modulus = 100 # magic number -- should not be too small!
-  @vprintln(:UnimodVerif,1,"Quick first check: compute det by crt up to modulus with about $(target_bitsize_modulus) bits")
-  p::Int = 2^20 # start point of primes for initial CRT trial -- det with modulus >= 2^22 is much slower
-  M = ZZ(1)
-  det_mod_m = 0  # 0 means uninitialized, o/w value is 1 or -1
-  A_mod_p = identity_matrix(Nemo.Native.GF(2), nrows(A))
-  while nbits(M) <= target_bitsize_modulus
-    @vprint(:UnimodVerif,2,".")
-    p = next_prime(p + rand(1:2^10), false) # increment by a random step to a prime
-    ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
-    map_entries!(ZZmodP, A_mod_p, A)
-    @vtime :UnimodVerif 2  det_mod_p = Int(_det(A_mod_p))
-    p2 = p>>1
-    if det_mod_p > p2
-      det_mod_p -= p
-    end
-    if abs(det_mod_p) != 1
-      return false  # obviously not unimodular
-    end
-    if det_mod_m != 0 && det_mod_m != det_mod_p
-      return false  # obviously not unimodular
-    end
-    det_mod_m = det_mod_p
-    mul!(M, M, p)
-  end
-  return _is_unimodular_given_det_mod_m(A, det_mod_m, M; algorithm)
-end #function
-
-
-function _is_unimodular_given_det_mod_m(A::ZZMatrix, det_mod_m::Int, M::ZZRingElem; algorithm=:auto)
-  # This UI is convenient for our sophisticated determinant algorithm.
-  # We estimate cost of computing det by CRT and cost of doing Pauderis-Storjohann
-  # unimodular verification then call whichever is likely to be faster
-  # (but we avoid P-S if the space estimate is too large).
-  # M is a modulus satisfying M > 2^30;
-  # det_mod_m is det(A) mod M [!not checked!]: it is either +1 or -1.
-  is_square(A) || throw(ArgumentError("Matrix must be square"))
-  abs(det_mod_m) == 1 || throw(ArgumentError("det_mod_m must be +1 or -1"))
-  M >  2^30 || throw(ArgumentError("modulus must be greater than 2^30"))
-  (algorithm in [:auto, :CRT, :pauderis_storjohann]) || throw(ArgumentError("algorithm must be one of [:CRT, :pauderis_storjohann, :auto]"))
-  # Deal with two trivial cases -- does this make sense here???
-  nrows(A) == 0 && return true
-  nrows(A) == 1 && return (abs(A[1,1]) == 1)
-  @vprintln(:UnimodVerif,1,"is_unimodular_given_det_mod_m starting")
-  if algorithm == :pauderis_storjohann
-    @vprintln(:UnimodVerif,1,"User specified Pauderis_Storjohann --> delegating")
-    return _is_unimodular_Pauderis_Storjohann_Hensel(A)
-  end
-  n = ncols(A)
-  Hrow = hadamard_bound2(A)
-  Hcol = hadamard_bound2(transpose(A))
-  DetBoundSq = min(Hrow, Hcol)
-  Hbits = 1+div(nbits(DetBoundSq),2)
-  @vprintln(:UnimodVerif,1,"Hadamard bound in bits $(Hbits)")
-  if algorithm == :auto
-    # Estimate whether better to "climb out" with CRT or use Pauderis-Storjohann
-    CRT_speed_factor = 20.0  # scale factor is JUST A GUESS !!!  (how much faster CRT is compared to P-S)
-    total_entry_size = sum(nbits,A)
-    Estimate_CRT = ceil(((Hbits - nbits(M))*(2.5+total_entry_size/n^3))/CRT_speed_factor)  # total_entry_size/n is proportional to cost of reducing entries mod p
-    EntrySize = maximum(abs, A)
-    entry_size_bits = maximum(nbits, A)
-    @vprintln(:UnimodVerif,1,"entry_size_bits = $(entry_size_bits)   log2(EntrySize) = $(ceil(log2(EntrySize)))")
-    e = max(16, Int(2+ceil(2*log2(n)+log2(EntrySize))))  # see Condition in Fig 1, p.284 of Pauderis-Storjohann
-    Estimate_PS = ceil(e*log(e)) # assumes soft-linear ZZ arith
-    @vprintln(:UnimodVerif,1,"Est_CRT = $(Estimate_CRT)  and  Est_PS = $(Estimate_PS)")
-    Space_estimate_PS = ZZ(n)^2 * e  # nbits of values, ignoring mem mgt overhead
-    @vprintln(:UnimodVerif,1,"Space est PS: $(Space_estimate_PS)  [might be bytes]")
-    if Estimate_PS < Estimate_CRT #=&& Space_estimate_PS < 2^32=#
-      # Expected RAM requirement is not excessive, and
-      # Pauderis-Storjohann is likely faster, so delegate to UniCertZ_hensel
-      return _is_unimodular_Pauderis_Storjohann_Hensel(A)
-    end
-  end
-  if algorithm == :CRT
-    @vprintln(:UnimodVerif,1,"User specified CRT --> delegating")
-  end
-  @vprintln(:UnimodVerif,1,"UnimodularVerification: CRT loop")
-  # Climb out with CRT:
-  # in CRT loop start with 22 bit primes; if we reach threshold (empirical), jump to much larger primes.
-  p = 2^21; stride = 2^10; threshold = 5000000;
-  A_mod_p = zero_matrix(Nemo.Native.GF(2), nrows(A), ncols(A))
-  while nbits(M) < Hbits
-    @vprint(:UnimodVerif,2,".")
-    # Next lines increment p (by random step up to stride) to a prime not dividing M
-    if p > threshold && p < threshold+stride
-      @vprint(:UnimodVerif,2,"!!")
-      p = 2^56; stride = 2^25;  # p = 2^56 is a good choice on my standard 64-bit machine
-      continue
-    end
-    # advance to another prime which does not divide M:
-    while true
-      p = next_prime(p+rand(1:stride), false)
-      if !is_divisible_by(M,p)
-        break
-      end
-    end
-    ZZmodP = Nemo.Native.GF(p; cached = false, check = false)
-    map_entries!(ZZmodP, A_mod_p, A)
-    det_mod_p = _det(A_mod_p)
-    if det_mod_p > p/2
-      det_mod_p -= p
-    end
-    if abs(det_mod_p) != 1 || det_mod_m != det_mod_p
-      return false  # obviously not unimodular
-    end
-    mul!(M, M, p)
-  end
-  return true
-end
-
-#DoublePlusOne complex ##############################
-
-#TODO: Hensel is slower than CRT if modulus is small(ish)
-
-function _PSH_init(A::ZZMatrix)
-  #computes a 
-  # modulus (X) in the paper
-  # the inverse of A mod x
-  # the exponent target for the lifting.
-
-  n = nrows(A)
-  @assert ncols(A) == n
-
-  EntrySize = maximum(abs, A)
-  entry_size_bits = maximum(nbits, A)
-  e = max(16, 2+ceil(Int, 2*log2(n)+entry_size_bits))
-  ModulusLWB = ZZ(2)^e
-  @vprintln(:UnimodVerif,1,"ModulusLWB is 2^$(e)")
-  # Now look for prime: about 25 bits, and such that p^(2^sthg) is
-  # just slightly bigger than ModulusLWB.  Bit-size of prime matters little.
-  root_order = exp2(ceil(log2(e/25)))
-  j = ceil(exp2(e/root_order))
-  p = next_prime(Int(j))
-  @vprintln(:UnimodVerif,1,"Hensel prime is p = $(p)")
-  m = ZZ(p)
-  ZZmodP = Nemo.Native.GF(p)
-  B0 = lift(inv(map_entries(ZZmodP, A))) #produces pos. lift
-  mod_sym!(B0, m)
-  delta = identity_matrix(base_ring(A), n)
-  A_mod_m = identity_matrix(base_ring(A), n)
-  @vprintln(:UnimodVerif,1,"Starting stage 1 lifting")
-  while (m < ModulusLWB)
-    @vprintln(:UnimodVerif,2,"Loop 1: nbits(m) = $(nbits(m))");
-    copy!(A_mod_m, A)
-    mm = m^2
-    mod_sym!(A_mod_m, mm)
-    @vtime :UnimodVerif 2  mul!(delta, A_mod_m, B0)
-    sub!(delta, delta, 1)
-    divexact!(delta, m) # to make matrix product in line below cheaper
-    @vtime :UnimodVerif 2 mul!(A_mod_m, B0, delta)
-    mul!(A_mod_m, A_mod_m, m)
-    sub!(B0, B0, A_mod_m)
-    m = mm
-    mod_sym!(B0, m)
-  end
-  @vprintln(:UnimodVerif,1,"Got stage 1 modular inverse: modulus has $(nbits(m)) bits; value is $(p)^$(Int(round(log2(m)/log2(p))))")
-
-  # Hadamard bound brings little benefit; anyway, almost never lift all the way
-###  H = min(hadamard_bound2(A), hadamard_bound2(transpose(A)))
-###  println("nbits(H) = $(nbits(H))");
-  # Compute max num iters in k
-  k = (n-1)*(entry_size_bits+log2(n)/2) - (2*log2(n) + entry_size_bits -1)
-  k = 2+nbits(ceil(Int,k/log2(m)))
-  @vprintln(:UnimodVerif,1,"Stage 2: max num iters is k=$(k)")
-
-  return m, B0, k
-end
-
-
-# THIS FUNCTION NOT OFFICIALLY DOCUMENTED -- not intended for public use.
-# Pauderis-Storjohann unimodular verification/certification.
-# We use Hensel lifting to obtain the modular inverse B0
-# Seems to be faster than the CRT prototype (not incl. here)
-# VERBOSITY via :UnimodVerif
-function _is_unimodular_Pauderis_Storjohann_Hensel(A::ZZMatrix)
-  n = nrows(A)
-  m, B0, k = _PSH_init(A)
-
-  ##  @assert is_one(lift(MatModM(B0*A)))
-  # Originally: R = (I-A*B0)/m
-  R = A*B0
-  sub!(R, R, 1) #this is -R, but is_zero test is the same, and the loop starts by squaring
-  divexact!(R, m)
-  if is_zero(R)
-    return true
-  end
-
-  #ZZMatrix and ZZModMatrix are the same type. The modulus is
-  #always passed in as an extra argument when needed...
-  #mul! for ZZModMatrix is mul, followed by reduce.
-  #thus the code is not using ZZModMatrices at all...
-
-  R_bar = zero_matrix(ZZ, n, n)
-  M = zero_matrix(ZZ, n, n)
-
-  @vprintln(:UnimodVerif,1,"Starting stage 2 lifting")
-  for i in 0:k-1
-    @vprintln(:UnimodVerif,1,"Stage 2 loop: i=$(i)")
-
-    mul!(R_bar, R, R)
-    #Next 3 lines do:  M = lift(B0_modm*MatModM(R_bar));
-    mod_sym!(R_bar, m)
-    mul!(M, B0, R_bar)
-    mod_sym!(M, m)
-    # Next 3 lines do: R = (R_bar - A*M)/m; but with less memory allocation
-    mul!(R, A, M)
-    sub!(R, R_bar, R)
-    divexact!(R, m)
-    if is_zero(R)
-      return true
-    end
-  end
-  return false
-end
diff --git a/test/Nemo-test.jl b/test/Nemo-test.jl
index 8e03ff876..bead4a11a 100644
--- a/test/Nemo-test.jl
+++ b/test/Nemo-test.jl
@@ -8,3 +8,5 @@ include("Infinity-test.jl")
 include("HeckeMiscLocalization-test.jl")
 include("matrix-test.jl")
 include("poly-test.jl")
+include("ZZMatrix-linalg-test.jl")
+
diff --git a/test/ZZMatrix-linalg-test.jl b/test/ZZMatrix-linalg-test.jl
new file mode 100644
index 000000000..d8d0a6af4
--- /dev/null
+++ b/test/ZZMatrix-linalg-test.jl
@@ -0,0 +1,30 @@
+@testset "dixon solver" begin
+  A = matrix(ZZ, 4, 4, [1, 2, 7, 4, 15, 6, 7, 9, 19, 10, 11, 11, 53, 14, 15, 16])
+  b = matrix(ZZ, 4, 1, [-1, -4, 5, 7])
+  bb = matrix(ZZ, 4, 2, [-1, -2, -3, 5, 11, 2, 13, 17])
+  for i = [1,5,10]
+    s, d = Nemo.dixon_solve(A^i, b)
+    @test A^i*s == d*b
+    s, d = Nemo.dixon_solve(A^i, transpose(b)); side = :left
+    @test s*A^i == d*transpose(b)
+
+    s, d = Nemo.dixon_solve(A^i, bb)
+    @test A^i*s == d*bb
+    s, d = Nemo.dixon_solve(A^i, transpose(bb)); side = :left
+    @test s*A^i == d*transpose(bb)
+  end
+end
+
+@testset "DoublePlusOne" begin
+  A = matrix(ZZ, 4, 4, [1, 2, 7, 4, 15, 6, 7, 9, 19, 10, 11, 11, 53, 14, 15, 16])
+  b = matrix(ZZ, 1, 4, [-1, -4, 5, 7])
+  bb = matrix(ZZ, 2, 4, [-1, -2, -3, 5, 11, 2, 13, 17])
+  for i = [1,5,10]
+    s, d = Nemo.UniCertSolve(A^i, b)
+    @test s*A^i == d*b
+
+    s, d = Nemo.UniCertSolve(A^i, bb)
+    @test s*A^i == d*bb
+  end
+end
+

From d0470ce439e24a622e0d06bfeaea00ff49691329 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 15:32:07 +0100
Subject: [PATCH 03/11] fix dims

---
 src/ZZMatrix-linalg.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 3ed7444bf..1003d1130 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -384,9 +384,9 @@ end
 function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right)
 
   if side == :right
-    @assert nrows(B) == ncols(A)
+    @assert nrows(B) == nrows(A)
   else
-    @assert ncols(B) == nrows(A)
+    @assert ncols(B) == ncols(A)
   end
 
   if isdefined(D, :A)

From be7d0dceaef25f0edc98c5cef0afc475453bca14 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 15:58:51 +0100
Subject: [PATCH 04/11] cool typo

---
 test/ZZMatrix-linalg-test.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/ZZMatrix-linalg-test.jl b/test/ZZMatrix-linalg-test.jl
index d8d0a6af4..03e23829e 100644
--- a/test/ZZMatrix-linalg-test.jl
+++ b/test/ZZMatrix-linalg-test.jl
@@ -5,12 +5,12 @@
   for i = [1,5,10]
     s, d = Nemo.dixon_solve(A^i, b)
     @test A^i*s == d*b
-    s, d = Nemo.dixon_solve(A^i, transpose(b)); side = :left
+    s, d = Nemo.dixon_solve(A^i, transpose(b); side = :left)
     @test s*A^i == d*transpose(b)
 
     s, d = Nemo.dixon_solve(A^i, bb)
     @test A^i*s == d*bb
-    s, d = Nemo.dixon_solve(A^i, transpose(bb)); side = :left
+    s, d = Nemo.dixon_solve(A^i, transpose(bb); side = :left)
     @test s*A^i == d*transpose(bb)
   end
 end

From 6bc9c2c8ec708dce53105f99733fd4633ab5beae Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 16:18:44 +0100
Subject: [PATCH 05/11] transposing and using it!

---
 src/ZZMatrix-linalg.jl | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 1003d1130..2079c7e3b 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -383,9 +383,11 @@ end
 
 function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :right)
 
+  sB = size(B)
   if side == :right
     @assert nrows(B) == nrows(A)
   else
+    sB = (sB[2], sB[1])
     @assert ncols(B) == ncols(A)
   end
 
@@ -394,7 +396,7 @@ function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :righ
   end
 
   if isdefined(D, :x)
-    @assert size(D.x) == size(B)
+    @assert size(D.x) == sB
   end
 
   if side == :right
@@ -460,7 +462,7 @@ function dixon_init!(D::DixonCtx, A::ZZMatrix, B::ZZMatrix; side::Symbol = :righ
 
   while true
     push!(D.crt_primes, xp)
-    push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), A))
+    push!(D.A_mod, map_entries(Nemo.fpField(UInt(xp), false), D.A))
     mul!(pr, pr, xp)
     if pr > maxA
       break
@@ -590,6 +592,8 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int
     end
     divexact!(d, d, ZZ(D.p))
   end
+  @show _t = D.A*D.x - _B
+  @show map(x-> is_zero(x) ? -1 : valuation(x, D.p), _t)
   fl, num, den = induce_rational_reconstruction(D.x, ppow)
   @assert fl
 

From a79df589fd37a12ee6babe743dfec9556bc1f4d6 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 16:28:11 +0100
Subject: [PATCH 06/11] remove printing

---
 src/ZZMatrix-linalg.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 2079c7e3b..2e4d78686 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -592,8 +592,6 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int
     end
     divexact!(d, d, ZZ(D.p))
   end
-  @show _t = D.A*D.x - _B
-  @show map(x-> is_zero(x) ? -1 : valuation(x, D.p), _t)
   fl, num, den = induce_rational_reconstruction(D.x, ppow)
   @assert fl
 

From fa42363bbcae705ee923cfb19937aa9593ef6e5c Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Fri, 7 Feb 2025 16:55:58 +0100
Subject: [PATCH 07/11] remove debugging

---
 src/ZZMatrix-linalg.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 2e4d78686..1fb676d25 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -935,7 +935,7 @@ function UniCertSolve(A::ZZMatrix, U::ZZMatrix)
       tau = induce_rational_reconstruction(mu, mex)
       if tau[1]
         GC.enable(GC_d)
-        return mu, m
+        return tau[2], tau[3]
       end
       GC.gc()
       GC.enable(false)

From fe9d5f4ac19c75be55069ad562d68887412a9d34 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Mon, 10 Feb 2025 09:20:39 +0100
Subject: [PATCH 08/11] make windows happy

---
 src/ZZMatrix-linalg.jl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 1fb676d25..8bab8ba3a 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -721,8 +721,8 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U
       mod_sym!(U_ptr, R_ptr, m, t)
       sub!(R_ptr, R_ptr, U_ptr)
       divexact!(R_ptr, R_ptr, m)
-      R_ptr += sizeof(Clong)
-      U_ptr += sizeof(Clong)
+      R_ptr += sizeof(Int)
+      U_ptr += sizeof(Int)
     end
     i += 1
     if i > nrows(U)
@@ -744,8 +744,8 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U
           mod_sym!(U_ptr, R_ptr, m, t)
           sub!(R_ptr, R_ptr, U_ptr)
           divexact!(R_ptr, R_ptr, m)
-          R_ptr += sizeof(Clong)
-          U_ptr += sizeof(Clong)
+          R_ptr += sizeof(Int)
+          U_ptr += sizeof(Int)
         end
         i += 1
       end
@@ -768,8 +768,8 @@ function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
     C_ptr = Nemo.mat_entry_ptr(C, i, 1)
     for j=1:ncols(A)
       add!(A_ptr, C_ptr)
-      A_ptr += sizeof(Clong)
-      C_ptr += sizeof(Clong)
+      A_ptr += sizeof(Int)
+      C_ptr += sizeof(Int)
     end
   end
 end
@@ -782,8 +782,8 @@ function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
     C_ptr = Nemo.mat_entry_ptr(C, i, 1)
     for j=1:ncols(A)
       sub!(A_ptr, C_ptr)
-      A_ptr += sizeof(Clong)
-      C_ptr += sizeof(Clong)
+      A_ptr += sizeof(Int)
+      C_ptr += sizeof(Int)
     end
   end
 end

From 89e2a3a2d8443c08782ab8cbe7f2ed62d1494ac5 Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Mon, 10 Feb 2025 09:57:39 +0100
Subject: [PATCH 09/11] add a few @preserve

---
 src/ZZMatrix-linalg.jl | 154 ++++++++++++++++++++++-------------------
 src/flint/fmpz_mat.jl  |  20 +++---
 2 files changed, 94 insertions(+), 80 deletions(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 8bab8ba3a..32c6da90b 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -291,23 +291,25 @@ function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem)
   bN = ZZRingElem(Val(:raw))
   bD = ZZRingElem(Val(:raw))
   D = ZZRingElem(1)
-  for i=1:nrows(a)
-    a_ptr = Nemo.mat_entry_ptr(a, i, 1)
-    A_ptr = Nemo.mat_entry_ptr(A, i, 1)
-    for j=1:ncols(a)
-      Nemo.set!(T, a_ptr)
-      Nemo.mul!(T, T, D)
-      Nemo.mod!(T, T, b)
-      fl = ratrec!(n, d, T, b, bN, bD)
-      fl || return fl, A, D
-      if !isone(d)
-        mul!(D, D, d)
-        Nemo.mul!(A, A, d)
-      end
-      Nemo.set!(A_ptr, n)
+  GC.@preserve a A begin
+    for i=1:nrows(a)
+      a_ptr = Nemo.mat_entry_ptr(a, i, 1)
+      A_ptr = Nemo.mat_entry_ptr(A, i, 1)
+      for j=1:ncols(a)
+        Nemo.set!(T, a_ptr)
+        Nemo.mul!(T, T, D)
+        Nemo.mod!(T, T, b)
+        fl = ratrec!(n, d, T, b, bN, bD)
+        fl || return fl, A, D
+        if !isone(d)
+          mul!(D, D, d)
+          Nemo.mul!(A, A, d)
+        end
+        Nemo.set!(A_ptr, n)
 
-      a_ptr += sizeof(ZZRingElem)
-      A_ptr += sizeof(ZZRingElem)
+        a_ptr += sizeof(ZZRingElem)
+        A_ptr += sizeof(ZZRingElem)
+      end
     end
   end
   Nemo._fmpz_clear_fn(bN)
@@ -565,17 +567,19 @@ function dixon_solve(D::DixonCtx, B::ZZMatrix; side::Symbol = :right, block::Int
 
     if ncols(_B) == 1
       n = nrows(D.A)
-      for i=1:n
-        Ay_ptr = mat_entry_ptr(D.Ay, i, 1)
-        A_ptr = mat_entry_ptr(D.A, i, 1)
-        d_ptr = mat_entry_ptr(d, i, 1)
-        zero!(Ay_ptr)
-        for j=1:n
-          y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1)
-          addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr))
-          A_ptr += sizeof(ZZRingElem)
+      GC.@preserve D d begin 
+        for i=1:n
+          Ay_ptr = mat_entry_ptr(D.Ay, i, 1)
+          A_ptr = mat_entry_ptr(D.A, i, 1)
+          d_ptr = mat_entry_ptr(d, i, 1)
+          zero!(Ay_ptr)
+          for j=1:n
+            y_ptr = Nemo.mat_entry_ptr(D.y_mod, j, 1)
+            addmul!(Ay_ptr, A_ptr, unsafe_load(y_ptr))
+            A_ptr += sizeof(ZZRingElem)
+          end
+          sub!(d_ptr, d_ptr, Ay_ptr)
         end
-        sub!(d_ptr, d_ptr, Ay_ptr)
       end
     else
       C = CrtCtx_Mat(block)
@@ -712,44 +716,46 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U
   t = ZZRingElem(Val(:raw))
 #  R =  zero_matrix(ZZ, 1, ncols(U))
   zero!(R)
-  while true
-    R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
-    U_ptr = Nemo.mat_entry_ptr(U, i, 1)
-  
-    for j=1:ncols(U)
-      add!(R_ptr, R_ptr, U_ptr)
-      mod_sym!(U_ptr, R_ptr, m, t)
-      sub!(R_ptr, R_ptr, U_ptr)
-      divexact!(R_ptr, R_ptr, m)
-      R_ptr += sizeof(Int)
-      U_ptr += sizeof(Int)
-    end
-    i += 1
-    if i > nrows(U)
-      if i > last || is_zero(R)
-        Nemo._fmpz_clear_fn(t)
-        return U
+  GC.@preserve R U begin
+    while true
+      R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
+      U_ptr = Nemo.mat_entry_ptr(U, i, 1)
+    
+      for j=1:ncols(U)
+        add!(R_ptr, R_ptr, U_ptr)
+        mod_sym!(U_ptr, R_ptr, m, t)
+        sub!(R_ptr, R_ptr, U_ptr)
+        divexact!(R_ptr, R_ptr, m)
+        R_ptr += sizeof(Int)
+        U_ptr += sizeof(Int)
       end
-      while !is_zero(R)
-        if i > last
+      i += 1
+      if i > nrows(U)
+        if i > last || is_zero(R)
           Nemo._fmpz_clear_fn(t)
           return U
         end
-        U.r += 1
-        @assert U.r <= last
-        U_ptr = Nemo.mat_entry_ptr(U, i, 1)
-        R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
-   
-        for j=1:ncols(U)
-          mod_sym!(U_ptr, R_ptr, m, t)
-          sub!(R_ptr, R_ptr, U_ptr)
-          divexact!(R_ptr, R_ptr, m)
-          R_ptr += sizeof(Int)
-          U_ptr += sizeof(Int)
+        while !is_zero(R)
+          if i > last
+            Nemo._fmpz_clear_fn(t)
+            return U
+          end
+          U.r += 1
+          @assert U.r <= last
+          U_ptr = Nemo.mat_entry_ptr(U, i, 1)
+          R_ptr = Nemo.mat_entry_ptr(R, 1, 1)
+     
+          for j=1:ncols(U)
+            mod_sym!(U_ptr, R_ptr, m, t)
+            sub!(R_ptr, R_ptr, U_ptr)
+            divexact!(R_ptr, R_ptr, m)
+            R_ptr += sizeof(Int)
+            U_ptr += sizeof(Int)
+          end
+          i += 1
         end
-        i += 1
+        return U
       end
-      return U
     end
   end
 end
@@ -763,13 +769,15 @@ end
 #add!(view(A, c+1:end, :), C) (but without the view)
 function add_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
   A.r = max(A.r, C.r+c)
-  for i=1:nrows(C)
-    A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
-    C_ptr = Nemo.mat_entry_ptr(C, i, 1)
-    for j=1:ncols(A)
-      add!(A_ptr, C_ptr)
-      A_ptr += sizeof(Int)
-      C_ptr += sizeof(Int)
+  GC.@preserve A C begin
+    for i=1:nrows(C)
+      A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
+      C_ptr = Nemo.mat_entry_ptr(C, i, 1)
+      for j=1:ncols(A)
+        add!(A_ptr, C_ptr)
+        A_ptr += sizeof(Int)
+        C_ptr += sizeof(Int)
+      end
     end
   end
 end
@@ -777,13 +785,15 @@ end
 #sub!(view(A, c+1:end, :), C) (but without the view)
 function sub_into!(A::ZZMatrix, C::ZZMatrix, c::Int)
   A.r = max(A.r, C.r+c)
-  for i=1:nrows(C)
-    A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
-    C_ptr = Nemo.mat_entry_ptr(C, i, 1)
-    for j=1:ncols(A)
-      sub!(A_ptr, C_ptr)
-      A_ptr += sizeof(Int)
-      C_ptr += sizeof(Int)
+  GC.@preserve A C begin
+    for i=1:nrows(C)
+      A_ptr = Nemo.mat_entry_ptr(A, i+c, 1)
+      C_ptr = Nemo.mat_entry_ptr(C, i, 1)
+      for j=1:ncols(A)
+        sub!(A_ptr, C_ptr)
+        A_ptr += sizeof(Int)
+        C_ptr += sizeof(Int)
+      end
     end
   end
 end
diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl
index 9db2a3495..369266aa9 100644
--- a/src/flint/fmpz_mat.jl
+++ b/src/flint/fmpz_mat.jl
@@ -1813,10 +1813,12 @@ function zero!(z::ZZMatrixOrPtr)
 end
 
 function zero_row!(z::ZZMatrix, i::Int)
-  z_ptr = mat_entry_ptr(z, i, 1)
-  for i=1:ncols(z)
-    zero!(z_ptr)
-    z_ptr += sizeof(Int)
+  GC.@preserve z begin
+    z_ptr = mat_entry_ptr(z, i, 1)
+    for i=1:ncols(z)
+      zero!(z_ptr)
+      z_ptr += sizeof(Int)
+    end
   end
   return z
 end
@@ -1842,10 +1844,12 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr)
 end
 
 function sub!(A::ZZMatrix, B::ZZMatrix, m::Int)
-  for i=1:nrows(A)
-    A_p = Nemo.mat_entry_ptr(A, i, i)
-    B_p = Nemo.mat_entry_ptr(B, i, i)
-    sub!(A_p, B_p, m)
+  GC.@preserve A begin
+    for i=1:nrows(A)
+      A_p = Nemo.mat_entry_ptr(A, i, i)
+      B_p = Nemo.mat_entry_ptr(B, i, i)
+      sub!(A_p, B_p, m)
+    end
   end
   return A
 end

From f53ded6ad14b73875eefab780ff2f62ca0fe04a0 Mon Sep 17 00:00:00 2001
From: Tommy Hofmann <thofma@gmail.com>
Date: Mon, 10 Feb 2025 12:11:42 +0100
Subject: [PATCH 10/11] Update src/flint/fmpz_mat.jl

---
 src/flint/fmpz_mat.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/flint/fmpz_mat.jl b/src/flint/fmpz_mat.jl
index 369266aa9..59cea50c4 100644
--- a/src/flint/fmpz_mat.jl
+++ b/src/flint/fmpz_mat.jl
@@ -1844,7 +1844,7 @@ function sub!(z::ZZMatrixOrPtr, x::ZZMatrixOrPtr, y::ZZMatrixOrPtr)
 end
 
 function sub!(A::ZZMatrix, B::ZZMatrix, m::Int)
-  GC.@preserve A begin
+  GC.@preserve A B begin
     for i=1:nrows(A)
       A_p = Nemo.mat_entry_ptr(A, i, i)
       B_p = Nemo.mat_entry_ptr(B, i, i)

From 27a930e4e885b5d2314103bf25efab73821d361c Mon Sep 17 00:00:00 2001
From: Claus Fieker <fieker@mathematik.uni-kl.de>
Date: Mon, 10 Feb 2025 12:22:39 +0100
Subject: [PATCH 11/11] add a clear

---
 src/ZZMatrix-linalg.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ZZMatrix-linalg.jl b/src/ZZMatrix-linalg.jl
index 32c6da90b..6e0256ab0 100644
--- a/src/ZZMatrix-linalg.jl
+++ b/src/ZZMatrix-linalg.jl
@@ -282,7 +282,7 @@ end
 
 #adaptive rational_reconstruction: if solution is unbalanced with
 #denominator smaller than numerator
-function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem)
+function induce_rational_reconstruction(a::ZZMatrix, b::ZZRingElem; ErrorTolerant ::Bool = false)
   A = similar(a)
 
   T = ZZRingElem(Val(:raw))
@@ -754,6 +754,7 @@ function _renorm(U::ZZMatrix, m::ZZRingElem; start::Int = 1, last::Int = nrows(U
           end
           i += 1
         end
+        Nemo._fmpz_clear_fn(t)
         return U
       end
     end