diff --git a/CHANGELOG.md b/CHANGELOG.md index 3486a8878..2e4617385 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ Unreleased ### Fixed - Fix benchmark result token - Fix set_ptr bug +- Fix linear solver bug for non-symmetric matrix ### Changed - Update cuda version of allgebra diff --git a/src/internal/lapack/getrs/dense_double_getrs.cpp b/src/internal/lapack/getrs/dense_double_getrs.cpp index f8ddbe2df..9df7fabbb 100644 --- a/src/internal/lapack/getrs/dense_double_getrs.cpp +++ b/src/internal/lapack/getrs/dense_double_getrs.cpp @@ -26,7 +26,7 @@ int internal::lapack::getrs(const matrix::Dense &A, vector &B, const double *Ad = A.data(); double *Bd = B.data(); const int *ipivd = ipiv.data(); - const char trans = 'N'; + const char trans = 'T'; if (A.get_device_mem_stat() == true && B.get_device_mem_stat() == true) { #if MONOLISH_USE_NVIDIA_GPU @@ -42,7 +42,11 @@ int internal::lapack::getrs(const matrix::Dense &A, vector &B, #pragma omp target data use_device_ptr(Ad, ipivd, Bd, devinfod) { - internal::check_CUDA(cusolverDnDgetrs(h, CUBLAS_OP_N, M, K, Ad, N, ipivd, + auto cublas_trans = CUBLAS_OP_N; + if (trans == 'T') { + cublas_trans = CUBLAS_OP_T; + } + internal::check_CUDA(cusolverDnDgetrs(h, cublas_trans, M, K, Ad, N, ipivd, Bd, M, devinfod)); } diff --git a/src/internal/lapack/getrs/dense_float_getrs.cpp b/src/internal/lapack/getrs/dense_float_getrs.cpp index f18dc1f96..d4fa8ef92 100644 --- a/src/internal/lapack/getrs/dense_float_getrs.cpp +++ b/src/internal/lapack/getrs/dense_float_getrs.cpp @@ -26,7 +26,7 @@ int internal::lapack::getrs(const matrix::Dense &A, vector &B, const float *Ad = A.data(); float *Bd = B.data(); const int *ipivd = ipiv.data(); - const char trans = 'N'; + const char trans = 'T'; if (A.get_device_mem_stat() == true && B.get_device_mem_stat() == true) { #if MONOLISH_USE_NVIDIA_GPU @@ -42,7 +42,11 @@ int internal::lapack::getrs(const matrix::Dense &A, vector &B, #pragma omp target data use_device_ptr(Ad, ipivd, Bd, devinfod) { - internal::check_CUDA(cusolverDnSgetrs(h, CUBLAS_OP_N, M, K, Ad, N, ipivd, + auto cublas_trans = CUBLAS_OP_N; + if (trans == 'T') { + cublas_trans = CUBLAS_OP_T; + } + internal::check_CUDA(cusolverDnSgetrs(h, cublas_trans, M, K, Ad, N, ipivd, Bd, M, devinfod)); } diff --git a/test/equation/dense_lu/Makefile b/test/equation/dense_lu/Makefile index 5b6acb53c..ff0b5cc28 100644 --- a/test/equation/dense_lu/Makefile +++ b/test/equation/dense_lu/Makefile @@ -16,15 +16,19 @@ sxat: run_cpu: ./$(FUNC)_cpu.out ../../test.mtx 1 + ./$(FUNC)_cpu.out ../../test2.mtx 1 run_gpu: $(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1 run_a64fx: $(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1 run_sxat: $(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1 clean: - rm *.out diff --git a/test/equation/sparse_ic/Makefile b/test/equation/sparse_ic/Makefile index 061d95e3d..47d4a7f50 100644 --- a/test/equation/sparse_ic/Makefile +++ b/test/equation/sparse_ic/Makefile @@ -16,15 +16,19 @@ sxat: run_cpu: ./$(FUNC)_cpu.out ../../test.mtx 1 + ./$(FUNC)_cpu.out ../../test2.mtx 1 run_gpu: $(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1 run_a64fx: $(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1 run_sxat: $(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1 clean: - rm *.out diff --git a/test/equation/sparse_ilu/Makefile b/test/equation/sparse_ilu/Makefile index 3a28743d5..9589b1ac3 100644 --- a/test/equation/sparse_ilu/Makefile +++ b/test/equation/sparse_ilu/Makefile @@ -16,15 +16,19 @@ sxat: run_cpu: ./$(FUNC)_cpu.out ../../test.mtx 1 + ./$(FUNC)_cpu.out ../../test2.mtx 1 run_gpu: $(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1 run_a64fx: $(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1 run_sxat: $(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1 clean: - rm *.out diff --git a/test/equation/sparse_qr/Makefile b/test/equation/sparse_qr/Makefile index 8fd4b8624..de766fbe2 100644 --- a/test/equation/sparse_qr/Makefile +++ b/test/equation/sparse_qr/Makefile @@ -16,15 +16,19 @@ sxat: run_cpu: ./$(FUNC)_cpu.out ../../test.mtx 1 + ./$(FUNC)_cpu.out ../../test2.mtx 1 run_gpu: $(PROFILER)./$(FUNC)_gpu.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_gpu.out ../../test2.mtx 1 run_a64fx: $(PROFILER)./$(FUNC)_a64fx.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_a64fx.out ../../test2.mtx 1 run_sxat: $(PROFILER)./$(FUNC)_sxat.out ../../test.mtx 1 + $(PROFILER)./$(FUNC)_sxat.out ../../test2.mtx 1 clean: - rm *.out diff --git a/test/test2.mtx b/test/test2.mtx new file mode 100755 index 000000000..dbd8f0453 --- /dev/null +++ b/test/test2.mtx @@ -0,0 +1,9 @@ +%%MatrixMarket matrix coordinate real general +3 3 7 +1 1 2 +1 2 1 +2 1 -1 +2 2 2 +2 3 -1 +3 2 1 +3 3 2 diff --git a/test/test_utils.hpp b/test/test_utils.hpp index 5a2710f71..1e3938583 100644 --- a/test/test_utils.hpp +++ b/test/test_utils.hpp @@ -31,8 +31,8 @@ bool ans_check(const std::string &func, double result, double ans, double tol) { } if (err < tol) { - std::cout << func << "(" << get_type() << ")" << std::flush; - std::cout << ": pass" << std::endl; + // std::cout << func << "(" << get_type() << ")" << std::flush; + // std::cout << ": pass" << std::endl; return true; } else { std::cout << "Error!!" << std::endl; @@ -104,8 +104,8 @@ bool ans_check(const std::string &func, const T *result, const T *ans, int size, } if (check) { - std::cout << func << "(" << get_type() << ")" << std::flush; - std::cout << ": pass" << std::endl; + // std::cout << func << "(" << get_type() << ")" << std::flush; + // std::cout << ": pass" << std::endl; return check; } else { std::cout << "Error!!" << std::endl; @@ -144,9 +144,9 @@ bool ans_check(const std::string &func, const std::string &type, } if (check) { - std::cout << func << "(" << get_type() << "," << type << ")" - << std::flush; - std::cout << ": pass" << std::endl; + // std::cout << func << "(" << get_type() << "," << type << ")" + // << std::flush; + // std::cout << ": pass" << std::endl; return check; } else { std::cout << "Error!!" << std::endl;