From 01763aaa1d828155c1688c604377adba80c9efd0 Mon Sep 17 00:00:00 2001 From: ZHG Date: Fri, 5 Jul 2019 16:15:52 +0200 Subject: [PATCH 1/7] Created the omp cra although named as cra-paladinhowever cra-domain-omp does not compile if any program refers to it --- linbox/algorithms/cra-domain-omp.h | 7 +- linbox/algorithms/cra-paladin.h | 266 +++++++++++++++++++++++++++++ linbox/solutions/methods.h | 1 + linbox/solutions/solve/solve-cra.h | 6 +- tests/test-solve-full.C | 15 +- 5 files changed, 285 insertions(+), 10 deletions(-) create mode 100644 linbox/algorithms/cra-paladin.h diff --git a/linbox/algorithms/cra-domain-omp.h b/linbox/algorithms/cra-domain-omp.h index 32f65c6e9a..2120aaa3ca 100644 --- a/linbox/algorithms/cra-domain-omp.h +++ b/linbox/algorithms/cra-domain-omp.h @@ -39,6 +39,7 @@ #include #include #include "linbox/algorithms/cra-domain-sequential.h" +#include "linbox/algorithms/rational-cra.h" namespace LinBox { @@ -90,10 +91,10 @@ namespace LinBox #pragma omp parallel for for(size_t i=0;iiterCount() << " primes." << std::endl; diff --git a/linbox/algorithms/cra-paladin.h b/linbox/algorithms/cra-paladin.h new file mode 100644 index 0000000000..06b976cdfe --- /dev/null +++ b/linbox/algorithms/cra-paladin.h @@ -0,0 +1,266 @@ +/* linbox/algorithms/cra-domain-omp.h + * Copyright (C) 1999-2010 The LinBox group + * + * Updated by Hongguang Zhu + * + * + * Naive parallel chinese remaindering + * Launch NN iterations in parallel, where NN=omp_get_num_threads() + * Then synchronization and termintation test. + * Time-stamp: <13 Mar 12 13:49:58 Jean-Guillaume.Dumas@imag.fr> + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + +/*! @file algorithms/cra-domain-omp.h + * @brief Parallel (OMP) version of \ref CRA + * @ingroup CRA + */ + +#ifndef __LINBOX_omp_cra_H +#define __LINBOX_omp_cra_H +// commentator is not thread safe +#define DISABLE_COMMENTATOR +#include + +#include +#include "linbox/randiter/random-prime.h" +#include "linbox/algorithms/rational-cra.h" + +namespace LinBox +{ + + template + struct ChineseRemainderOMP { + typedef typename CRABase::Domain Domain; + typedef typename CRABase::DomainElement DomainElement; + typedef ChineseRemainderSequential Father_t; + + CRABase Builder_; + double B; + + template + ChineseRemainderOMP(const Param& b) : + Builder_(b), B(b) + {} + + ChineseRemainderOMP(const CRABase& b) : + Builder_(b) + {} + + template + Integer& operator() (Integer& res, Function& Iteration, PrimeIterator& primeiter) + { + + size_t NN; +#pragma omp parallel +#pragma omp single + NN = omp_get_num_threads(); + + // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); + // if (NN == 1) return Father_t::operator()(res,Iteration,primeiter); + + this->para_compute(Iteration); + + // commentator().stop ("done", NULL, "mmcrait"); + + return this->Builder_.result(res); + } + + // @fixme REMOVE ? + template + Container& operator() (Container& res, Function& Iteration, PrimeIterator& primeiter) + { + + size_t NN; +#pragma omp parallel +#pragma omp single + NN = omp_get_num_threads(); + + // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); + // if (NN == 1) return Father_t::operator()(res,Iteration,primeiter); + + this->para_compute(Iteration); + + // commentator().stop ("done", NULL, "mmcrait"); + + return this->Builder_.result(res); + } + + + template + Integer& operator() (Integer& res, Integer& den, Function& Iteration, PrimeIterator& primeiter) + { + //! @bug why why why ??? + /** erreur: ‘omp_get_num_threads’ has not been declared + * ../linbox/algorithms/cra-domain-omp.h:152:16: note: suggested alternative: + * /usr/lib/gcc/x86_64-linux-gnu/4.6/include/omp.h:64:12: note: ‘Givaro::omp_get_num_threads’ + */ + size_t NN; +#pragma omp parallel + NN = omp_get_num_threads(); + + // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); + // if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); + + this->para_compute(Iteration); + + // commentator().stop ("done", NULL, "mmcrait"); + + return this->Builder_.result(res,den); + } + + + template + Vect& operator() (Vect& num, Vect& den, Function& Iteration, PrimeIterator& primeiter) + { + //! @bug why why why ??? + /** erreur: ‘omp_get_num_threads’ has not been declared + * ../linbox/algorithms/cra-domain-omp.h:152:16: note: suggested alternative: + * /usr/lib/gcc/x86_64-linux-gnu/4.6/include/omp.h:64:12: note: ‘Givaro::omp_get_num_threads’ + */ + size_t NN; +#pragma omp parallel + NN = omp_get_num_threads(); + + // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); + // if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); + + this->para_compute>(Iteration); // @fixme Should be rebind of Vect into Domain + + // commentator().stop ("done", NULL, "mmcrait"); + + return this->Builder_.result(num,den); + } + + + template + void para_compute(Function Iteration){ // @fixme Missing &? + size_t NN; +#pragma omp parallel +#pragma omp single + NN = omp_get_num_threads(); + +// typedef typename CRATemporaryVectorTrait::Type_t ElementContainer; + + std::vector ROUNDresidues;ROUNDresidues.resize(NN); + std::vector ROUNDdomains;ROUNDdomains.resize(NN); + //std::vector> m_primeiters;m_primeiters.reserve(NN); + std::vector> m_primeiters;m_primeiters.reserve(NN); + + std::vector vBuilders;vBuilders.reserve(NN); + + + for(auto j=0u;j m_primeiter( j, NN); + LinBox::MaskedPrimeIterator m_primeiter( j, NN); + ++m_primeiter; m_primeiters.push_back(m_primeiter); + + CRABase Builder_(B);vBuilders.push_back(Builder_); + + } + + this->compute_task(m_primeiters, Iteration, ROUNDdomains, + ROUNDresidues, vBuilders); + + + } + + + template< class Function, class PrimeIterator, class ResidueType>//, class ElementContainer> + void solve_with_prime(std::vector& m_primeiters, + Function& Iteration, std::vector& ROUNDdomains, + std::vector& ROUNDresidues, std::vector& vBuilders) + { + ++m_primeiters[ omp_get_thread_num()]; + + while(vBuilders[ omp_get_thread_num()].noncoprime(*m_primeiters[ omp_get_thread_num()]) ) + ++m_primeiters[ omp_get_thread_num()]; + + + ROUNDdomains[ omp_get_thread_num()] = Domain(*m_primeiters[ omp_get_thread_num()]); + + Iteration(ROUNDresidues[ omp_get_thread_num()], ROUNDdomains[ omp_get_thread_num()]); + + } + + + template//, class ElementContainer> + void compute_task(std::vector& m_primeiters, + Function& Iteration, std::vector& ROUNDdomains, + std::vector& ROUNDresidues, std::vector& vBuilders) + { + + size_t NN; +#pragma omp parallel +#pragma omp single + NN = omp_get_num_threads(); + long Niter=std::ceil(1.442695040889*B/(double)(m_primeiters[0].getBits()-1)); + + bool need2Init=true; + +#pragma omp parallel for num_threads(NN) schedule(dynamic,1) + for(auto j=0;jBuilder_.initialize( ROUNDdomains[ omp_get_thread_num()], ROUNDresidues[ omp_get_thread_num()]); + need2Init=false; + }else{ + this->Builder_.progress( ROUNDdomains[ omp_get_thread_num()], ROUNDresidues[ omp_get_thread_num()]); + + } + + } + } + + template + Container& operator() (Container& res, Integer& den, Function& Iteration, PrimeIterator& primeiter) + { + + size_t NN; +#pragma omp parallel +#pragma omp single + NN = omp_get_num_threads(); + // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); + //if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); + + this->para_compute>(Iteration); // @fixme SHould be container -> rebind + + // commentator().stop ("done", NULL, "mmcrait"); + + return this->Builder_.result(res,den); + + } + + }; + +} +#endif //__LINBOX_omp_cra_H + +// Local Variables: +// mode: C++ +// tab-width: 4 +// indent-tabs-mode: nil +// c-basic-offset: 4 +// End: +// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 881de19599..500e8fb03d 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -109,6 +109,7 @@ namespace LinBox { SMP, //!< Use symmetric multiprocessing (Paladin) to do sub-computations. Distributed, //!< Use MPI to distribute sub-computations accross nodes. Combined, //!< Use MPI then Paladin on each node. + Paladin, //!< Use Paladin for multithreading. }; /** diff --git a/linbox/solutions/solve/solve-cra.h b/linbox/solutions/solve/solve-cra.h index 2141b39dbd..ff9a72855f 100644 --- a/linbox/solutions/solve/solve-cra.h +++ b/linbox/solutions/solve/solve-cra.h @@ -31,7 +31,7 @@ */ #pragma once - +#include #include #include #include @@ -180,6 +180,10 @@ namespace LinBox { cra(num, den, iteration, primeGenerator); } #endif + else if (dispatch == Dispatch::Paladin) { + LinBox::ChineseRemainderOMP cra(hadamardLogBound); + cra(num, den, iteration, primeGenerator); + } else { throw LinBox::NotImplementedYet("Integer CRA Solve with specified dispatch type is not implemented yet."); } diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 1739664c66..fc6ae4ad6a 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -221,7 +221,7 @@ int main(int argc, char** argv) {'B', "-B", "Vector bit size for rational solve tests (defaults to -b if not specified).", TYPE_INT, &vectorBitSize}, {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m}, {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n}, - {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString}, + {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP, Paladin or Distributed).", TYPE_STR, &dispatchString}, END_OF_ARGUMENTS}; parseArguments(argc, argv, args); @@ -239,8 +239,10 @@ int main(int argc, char** argv) method.dispatch = Dispatch::Sequential; else if (dispatchString == "SMP") method.dispatch = Dispatch::SMP; + else if (dispatchString == "Paladin") + method.dispatch = Dispatch::Paladin; else if (dispatchString != "Auto") { - std::cerr << "-d Dispatch mode should be either Auto, Sequential, SMP or Distributed" << std::endl; + std::cerr << "-d Dispatch mode should be either Auto, Sequential, SMP, Paladin or Distributed" << std::endl; return EXIT_FAILURE; } @@ -262,6 +264,7 @@ int main(int argc, char** argv) bool ok = true; do { +/* // ----- Rational Auto ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -271,13 +274,13 @@ int main(int argc, char** argv) ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - +*/ // ----- Rational CRA // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); +// ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - +/* ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -347,7 +350,7 @@ int main(int argc, char** argv) // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); - +*/ if (!ok) { std::cerr << "Failed with seed: " << seed << std::endl; } From c7a1324500890755d3fc5ecc650feceff3993eb9 Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 8 Jul 2019 16:21:10 +0200 Subject: [PATCH 2/7] Multithreaded CRA added with Paladin --- benchmarks/benchmark-dense-solve.C | 3 +- linbox/algorithms/cra-paladin.h | 210 ++++++++++------------------- linbox/solutions/solve/solve-cra.h | 2 +- 3 files changed, 72 insertions(+), 143 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 504cb69e06..482a354624 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -143,7 +143,7 @@ int main(int argc, char** argv) {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q}, {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, - {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, + {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed, Paladin).", TYPE_STR, &args.dispatchString}, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " @@ -162,6 +162,7 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; + else if (args.dispatchString == "Paladin") method.dispatch = Dispatch::Paladin; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else method.dispatch = Dispatch::Auto; diff --git a/linbox/algorithms/cra-paladin.h b/linbox/algorithms/cra-paladin.h index 06b976cdfe..bc30d81df4 100644 --- a/linbox/algorithms/cra-paladin.h +++ b/linbox/algorithms/cra-paladin.h @@ -36,7 +36,7 @@ #ifndef __LINBOX_omp_cra_H #define __LINBOX_omp_cra_H // commentator is not thread safe -#define DISABLE_COMMENTATOR +//#define DISABLE_COMMENTATOR #include #include @@ -45,206 +45,134 @@ namespace LinBox { - + template - struct ChineseRemainderOMP { + struct RationalChineseRemainderPaladin { typedef typename CRABase::Domain Domain; typedef typename CRABase::DomainElement DomainElement; - typedef ChineseRemainderSequential Father_t; - + typedef RationalChineseRemainder Father_t; + CRABase Builder_; - double B; - + double HB;//hadamard bound + int Niter; + template - ChineseRemainderOMP(const Param& b) : - Builder_(b), B(b) - {} - - ChineseRemainderOMP(const CRABase& b) : - Builder_(b) + RationalChineseRemainderPaladin(const Param& b) : + Builder_(b), HB(b), Niter(0) {} template Integer& operator() (Integer& res, Function& Iteration, PrimeIterator& primeiter) { - - size_t NN; -#pragma omp parallel -#pragma omp single - NN = omp_get_num_threads(); - // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); - // if (NN == 1) return Father_t::operator()(res,Iteration,primeiter); - - this->para_compute(Iteration); - + this->para_compute(Iteration,primeiter); // commentator().stop ("done", NULL, "mmcrait"); return this->Builder_.result(res); } - + // @fixme REMOVE ? template Container& operator() (Container& res, Function& Iteration, PrimeIterator& primeiter) { - - size_t NN; -#pragma omp parallel -#pragma omp single - NN = omp_get_num_threads(); - // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); - // if (NN == 1) return Father_t::operator()(res,Iteration,primeiter); - - this->para_compute(Iteration); - + this->para_compute(Iteration,primeiter); + // commentator().stop ("done", NULL, "mmcrait"); return this->Builder_.result(res); } - - + + template Integer& operator() (Integer& res, Integer& den, Function& Iteration, PrimeIterator& primeiter) { - //! @bug why why why ??? - /** erreur: ‘omp_get_num_threads’ has not been declared - * ../linbox/algorithms/cra-domain-omp.h:152:16: note: suggested alternative: - * /usr/lib/gcc/x86_64-linux-gnu/4.6/include/omp.h:64:12: note: ‘Givaro::omp_get_num_threads’ - */ - size_t NN; -#pragma omp parallel - NN = omp_get_num_threads(); - - // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); - // if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); - - this->para_compute(Iteration); - + + this->para_compute(Iteration,primeiter); + // commentator().stop ("done", NULL, "mmcrait"); - + return this->Builder_.result(res,den); } - - + + template Vect& operator() (Vect& num, Vect& den, Function& Iteration, PrimeIterator& primeiter) { - //! @bug why why why ??? - /** erreur: ‘omp_get_num_threads’ has not been declared - * ../linbox/algorithms/cra-domain-omp.h:152:16: note: suggested alternative: - * /usr/lib/gcc/x86_64-linux-gnu/4.6/include/omp.h:64:12: note: ‘Givaro::omp_get_num_threads’ - */ - size_t NN; -#pragma omp parallel - NN = omp_get_num_threads(); - - // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); - // if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); - - this->para_compute>(Iteration); // @fixme Should be rebind of Vect into Domain - + + this->para_compute>(Iteration,primeiter); // @fixme Should be rebind of Vect into Domain + // commentator().stop ("done", NULL, "mmcrait"); return this->Builder_.result(num,den); } - - - template - void para_compute(Function Iteration){ // @fixme Missing &? - size_t NN; -#pragma omp parallel -#pragma omp single - NN = omp_get_num_threads(); - -// typedef typename CRATemporaryVectorTrait::Type_t ElementContainer; - - std::vector ROUNDresidues;ROUNDresidues.resize(NN); - std::vector ROUNDdomains;ROUNDdomains.resize(NN); - //std::vector> m_primeiters;m_primeiters.reserve(NN); - std::vector> m_primeiters;m_primeiters.reserve(NN); - - std::vector vBuilders;vBuilders.reserve(NN); - for(auto j=0u;j m_primeiter( j, NN); - LinBox::MaskedPrimeIterator m_primeiter( j, NN); - ++m_primeiter; m_primeiters.push_back(m_primeiter); - - CRABase Builder_(B);vBuilders.push_back(Builder_); + template + void para_compute(Function &Iteration, PrimeIterator& primeiter){ + this->Niter = std::ceil(1.442695040889*HB/(double)(primeiter.getBits()-1)); + std::vector ROUNDresidues; + ROUNDresidues.resize(this->Niter); + LinBox::PrimeIterator gen(primeiter.getBits()); + std::vector m_primeiters;m_primeiters.reserve(this->Niter); + for(auto j=0;jBuilder_.noncoprime(*gen) ) + ++primeiter; + m_primeiters.push_back(*gen); } - - this->compute_task(m_primeiters, Iteration, ROUNDdomains, - ROUNDresidues, vBuilders); + + this->compute_task(m_primeiters, Iteration, ROUNDresidues); } - template< class Function, class PrimeIterator, class ResidueType>//, class ElementContainer> - void solve_with_prime(std::vector& m_primeiters, - Function& Iteration, std::vector& ROUNDdomains, - std::vector& ROUNDresidues, std::vector& vBuilders) + template< class Function, class ResidueType> + void solve_with_prime(int m_primeiter, Function& Iteration, ResidueType& ROUNDresidue) { - ++m_primeiters[ omp_get_thread_num()]; - - while(vBuilders[ omp_get_thread_num()].noncoprime(*m_primeiters[ omp_get_thread_num()]) ) - ++m_primeiters[ omp_get_thread_num()]; - - - ROUNDdomains[ omp_get_thread_num()] = Domain(*m_primeiters[ omp_get_thread_num()]); - - Iteration(ROUNDresidues[ omp_get_thread_num()], ROUNDdomains[ omp_get_thread_num()]); + Domain D(m_primeiter); + Iteration(ROUNDresidue, D); } - template//, class ElementContainer> + template void compute_task(std::vector& m_primeiters, - Function& Iteration, std::vector& ROUNDdomains, - std::vector& ROUNDresidues, std::vector& vBuilders) + Function& Iteration, std::vector& ROUNDresidues) { - size_t NN; -#pragma omp parallel -#pragma omp single - NN = omp_get_num_threads(); - long Niter=std::ceil(1.442695040889*B/(double)(m_primeiters[0].getBits()-1)); - - bool need2Init=true; - -#pragma omp parallel for num_threads(NN) schedule(dynamic,1) - for(auto j=0;jBuilder_.initialize( ROUNDdomains[ omp_get_thread_num()], ROUNDresidues[ omp_get_thread_num()]); - need2Init=false; - }else{ - this->Builder_.progress( ROUNDdomains[ omp_get_thread_num()], ROUNDresidues[ omp_get_thread_num()]); - - } - - } + long NN = this->Niter; + PAR_BLOCK{ + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + SYNCH_GROUP({ + FORBLOCK1D(iter, NN, sp,{ + TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,ROUNDresidues)),{ + for(auto j=iter.begin(); j!=iter.end(); ++j) + { + + solve_with_prime(m_primeiters[j], Iteration, ROUNDresidues[j]); + } + }) + }) + + }) + } + Domain D(m_primeiters[0]); + this->Builder_.initialize( D, ROUNDresidues[0]); + for(auto j=1;jBuilder_.progress( D, ROUNDresidues[j]); + } } + template Container& operator() (Container& res, Integer& den, Function& Iteration, PrimeIterator& primeiter) { - size_t NN; -#pragma omp parallel -#pragma omp single - NN = omp_get_num_threads(); - // commentator().start ("Parallel OMP Givaro::Modular iteration", "mmcrait"); - //if (NN == 1) return Father_t::operator()(res, den, Iteration,primeiter); - - this->para_compute>(Iteration); // @fixme SHould be container -> rebind + this->para_compute>(Iteration,primeiter); // @fixme SHould be container -> rebind // commentator().stop ("done", NULL, "mmcrait"); diff --git a/linbox/solutions/solve/solve-cra.h b/linbox/solutions/solve/solve-cra.h index ff9a72855f..4e00e79fba 100644 --- a/linbox/solutions/solve/solve-cra.h +++ b/linbox/solutions/solve/solve-cra.h @@ -181,7 +181,7 @@ namespace LinBox { } #endif else if (dispatch == Dispatch::Paladin) { - LinBox::ChineseRemainderOMP cra(hadamardLogBound); + LinBox::RationalChineseRemainderPaladin cra(hadamardLogBound); cra(num, den, iteration, primeGenerator); } else { From 94baf7d9c4a21754e5c3bd39545a439d78f7cb7e Mon Sep 17 00:00:00 2001 From: ZHG Date: Tue, 9 Jul 2019 14:40:54 +0200 Subject: [PATCH 3/7] Ready to do benchmark --- benchmarks/benchmark-dense-solve.C | 36 +++++++++++++++++------------- linbox/algorithms/cra-paladin.h | 6 ++--- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 482a354624..31e2c62a31 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -53,6 +53,7 @@ namespace { int nbiter = 3; int n = 500; int bits = 10; + int seed = -1; std::string dispatchString = "Auto"; std::string methodString = "Auto"; }; @@ -71,10 +72,10 @@ namespace { } template > -void benchmark(std::pair& timebits, Arguments& args, MethodBase& method) +void benchmark(std::array& timebits, Arguments& args, MethodBase& method) { - Field F(args.q); // q is ignored for Integers - typename Field::RandIter randIter(F, args.bits); // bits is ignored for ModularRandIter + Field F(args.q); // q is ignored for Integers + typename Field::RandIter randIter(F, args.seed, args.bits); // bits is ignored for ModularRandIter #ifdef _BENCHMARKS_DEBUG_ std::clog << "Setting A ... " << std::endl; @@ -127,12 +128,9 @@ void benchmark(std::pair& timebits, Arguments& args, MethodBase& if (method.master()) { chrono.stop(); -#ifdef _BENCHMARKS_DEBUG_ - printVector(std::clog << "(DenseElimination) Solution is ", F, X) << std::endl; -#endif - - setBitsize(timebits.second, args.q, X); - timebits.first = chrono.usertime(); + timebits[0] = chrono.usertime(); + timebits[1] = chrono.realtime(); + setBitsize(timebits[2], args.q, X); } } @@ -143,7 +141,8 @@ int main(int argc, char** argv) {'q', "-q", "Set the field characteristic (-1 for rationals).", TYPE_INTEGER, &args.q}, {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, - {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed, Paladin).", TYPE_STR, &args.dispatchString}, + {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, + {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Paladin, Distributed).", TYPE_STR, &args.dispatchString}, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " "Dixon, CRA, SymbolicNumericOverlap, SymbolicNumericNorm, " @@ -152,6 +151,10 @@ int main(int argc, char** argv) END_OF_ARGUMENTS}; LinBox::parseArguments(argc, argv, as); + if (args.seed < 0) { + args.seed = time(nullptr); + } + // Setting up context Communicator communicator(&argc, &argv); @@ -162,8 +165,8 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; - else if (args.dispatchString == "Paladin") method.dispatch = Dispatch::Paladin; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; + else if (args.dispatchString == "Paladin") method.dispatch = Dispatch::Paladin; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else method.dispatch = Dispatch::Auto; @@ -172,7 +175,7 @@ int main(int argc, char** argv) bool isModular = false; if (args.q > 0) isModular = true; - using Timing = std::pair; + using Timing = std::array; std::vector timebits(args.nbiter); for (int iter = 0; iter < args.nbiter; ++iter) { if (isModular) { @@ -185,16 +188,19 @@ int main(int argc, char** argv) } #ifdef _BENCHMARKS_DEBUG_ - for (const auto& it : timebits) std::clog << it.first << "s, " << it.second << " bits" << std::endl; + for (const auto& it : timebits) std::clog << it[0] << "s, " << it[2] << " bits" << std::endl; #endif if (method.master()) { - std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a.first > b.first; }); + std::sort(timebits.begin(), timebits.end(), [](const Timing& a, const Timing& b) -> bool { return a[0] > b[0]; }); - std::cout << "Time: " << timebits[args.nbiter / 2].first << " Bitsize: " << timebits[args.nbiter / 2].second; + std::cout << "UserTime: " << timebits[args.nbiter / 2][0]; + std::cout << " RealTime: " << timebits[args.nbiter / 2][1]; + std::cout << " Bitsize: " << timebits[args.nbiter / 2][2]; FFLAS::writeCommandString(std::cout, as) << std::endl; } return 0; } + diff --git a/linbox/algorithms/cra-paladin.h b/linbox/algorithms/cra-paladin.h index bc30d81df4..a6eed2cc2b 100644 --- a/linbox/algorithms/cra-paladin.h +++ b/linbox/algorithms/cra-paladin.h @@ -67,7 +67,7 @@ namespace LinBox this->para_compute(Iteration,primeiter); // commentator().stop ("done", NULL, "mmcrait"); - + return this->Builder_.result(res); } @@ -79,7 +79,7 @@ namespace LinBox this->para_compute(Iteration,primeiter); // commentator().stop ("done", NULL, "mmcrait"); - + return this->Builder_.result(res); } @@ -103,7 +103,7 @@ namespace LinBox this->para_compute>(Iteration,primeiter); // @fixme Should be rebind of Vect into Domain // commentator().stop ("done", NULL, "mmcrait"); - + return this->Builder_.result(num,den); } From 8d990871be1731b9fed26213d6d8154516b3dfd3 Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 15 Jul 2019 11:43:32 +0200 Subject: [PATCH 4/7] Adopted MODE for FOR1D in Paladin --- linbox/algorithms/cra-paladin.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/linbox/algorithms/cra-paladin.h b/linbox/algorithms/cra-paladin.h index a6eed2cc2b..a5cf09512e 100644 --- a/linbox/algorithms/cra-paladin.h +++ b/linbox/algorithms/cra-paladin.h @@ -144,6 +144,7 @@ namespace LinBox { long NN = this->Niter; +#if 0 PAR_BLOCK{ auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); SYNCH_GROUP({ @@ -159,6 +160,16 @@ namespace LinBox }) } +#else + PAR_BLOCK{ + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + FOR1D(iter, NN, sp, MODE(CONSTREFERENCE(m_primeiters,Iteration,ROUNDresidues)), + { + solve_with_prime(m_primeiters[iter], Iteration, ROUNDresidues[iter]); + }) + + } +#endif Domain D(m_primeiters[0]); this->Builder_.initialize( D, ROUNDresidues[0]); for(auto j=1;j Date: Mon, 5 Aug 2019 11:47:29 +0200 Subject: [PATCH 5/7] Cleaned up for code review --- linbox/util/mpicpp.inl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl index 203f456fa9..ad14a57627 100644 --- a/linbox/util/mpicpp.inl +++ b/linbox/util/mpicpp.inl @@ -137,6 +137,12 @@ namespace LinBox { unserialize(value, bytes); } } + //Specialization for Bcast with only one boolean value data + template <> void Communicator::bcast(bool& value, int src) + { + MPI_Bcast(&value, 1, MPI::BOOL, src, _comm); + + } } // Local Variables: From 57d466e0dc4edfb1ed40f5b17a2537da7b3a7202 Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 5 Aug 2019 12:18:28 +0200 Subject: [PATCH 6/7] Quick fix for the compiling time error with minpoly example program as the communicatorp for Blackbox no longer exists --- examples/minpoly.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/minpoly.C b/examples/minpoly.C index 395caa76f9..0e0579071d 100644 --- a/examples/minpoly.C +++ b/examples/minpoly.C @@ -75,7 +75,7 @@ int main (int argc, char **argv) #ifdef __LINBOX_HAVE_MPI Communicator C(&argc, &argv); process = C.rank(); - M.communicatorp(&C); + //M.communicatorp(&C); #endif Givaro::ZRing ZZ; From 1d86c9fc403ac2bc04e68e467755b7548c194ee9 Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 28 Aug 2019 12:53:16 +0200 Subject: [PATCH 7/7] cleaned up as required during the 1st code review --- benchmarks/benchmark-dense-solve.C | 3 +-- linbox/solutions/methods.h | 1 - tests/test-solve-full.C | 12 +++++------- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 48910da0f6..88e7158a0f 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -144,7 +144,7 @@ int main(int argc, char** argv) {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, - {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, + {'d', "-d", "Dispatch mode (any of: Auto, Sequential, Distributed).", TYPE_STR, &args.dispatchString}, {'t', "-t", "Number of threads.", TYPE_INT, &numThreads }, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " @@ -173,7 +173,6 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; - else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Paladin") method.dispatch = Dispatch::Paladin; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else method.dispatch = Dispatch::Auto; diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 500e8fb03d..1f6ed3459b 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -106,7 +106,6 @@ namespace LinBox { enum class Dispatch { Auto, //!< Let implementation decide what to use. Sequential, //!< All sub-computations are done sequentially. - SMP, //!< Use symmetric multiprocessing (Paladin) to do sub-computations. Distributed, //!< Use MPI to distribute sub-computations accross nodes. Combined, //!< Use MPI then Paladin on each node. Paladin, //!< Use Paladin for multithreading. diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index fc6ae4ad6a..cc64a95cf9 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -221,7 +221,7 @@ int main(int argc, char** argv) {'B', "-B", "Vector bit size for rational solve tests (defaults to -b if not specified).", TYPE_INT, &vectorBitSize}, {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m}, {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n}, - {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP, Paladin or Distributed).", TYPE_STR, &dispatchString}, + {'d', "-d", "Dispatch mode (either Auto, Sequential, Paladin or Distributed).", TYPE_STR, &dispatchString}, END_OF_ARGUMENTS}; parseArguments(argc, argv, args); @@ -237,8 +237,6 @@ int main(int argc, char** argv) method.dispatch = Dispatch::Distributed; else if (dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; - else if (dispatchString == "SMP") - method.dispatch = Dispatch::SMP; else if (dispatchString == "Paladin") method.dispatch = Dispatch::Paladin; else if (dispatchString != "Auto") { @@ -264,7 +262,7 @@ int main(int argc, char** argv) bool ok = true; do { -/* + // ----- Rational Auto ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -274,13 +272,13 @@ int main(int argc, char** argv) ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); -*/ + // ----- Rational CRA // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); -/* + ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -350,7 +348,7 @@ int main(int argc, char** argv) // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); -*/ + if (!ok) { std::cerr << "Failed with seed: " << seed << std::endl; }