diff --git a/docs/src/gpu.md b/docs/src/gpu.md index 5664c1d8f..cf5c80003 100644 --- a/docs/src/gpu.md +++ b/docs/src/gpu.md @@ -52,7 +52,7 @@ if CUDA.functional() end ``` -If you use a Krylov method that only requires `A * v` products (see @factorization-free), the most efficient format is `CuSparseMatrixCSR`. +If you use a Krylov method that only requires `A * v` products (see [here](@ref factorization-free)), the most efficient format is `CuSparseMatrixCSR`. Optimized operator-vector products that exploit GPU features can be also used by means of linear operators. Preconditioners, especially incomplete Cholesky or Incomplete LU factorizations that involve triangular solves, diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl index 1e202be34..f8f3af1f9 100644 --- a/test/gpu/nvidia.jl +++ b/test/gpu/nvidia.jl @@ -21,9 +21,18 @@ include("gpu.jl") A_coo_gpu = CuSparseMatrixCOO(A_cpu) b_gpu = CuVector(b_cpu) b_gpu = CuVector(b_cpu) - x_csc, stats_csc = lslq(A_csc_gpu, b_gpu, verbose=1) - x_csr, stats_csr = lsqr(A_csr_gpu, b_gpu, verbose=1) - x_coo, stats_coo = lsmr(A_coo_gpu, b_gpu, verbose=1) + x_csc, stats_csc = lslq(A_csc_gpu, b_gpu) + Aᴴr_csc = A_csc_gpu' * (b_gpu - A_csc_gpu * x_csc) + println("cas 1") + println(norm(Aᴴr_csc)) + x_csr, stats_csr = lsqr(A_csr_gpu, b_gpu) + Aᴴr_csr = A_csr_gpu' * (b_gpu - A_csr_gpu * x_csr) + println("cas 2") + println(norm(Aᴴr_csr)) + x_coo, stats_coo = lsmr(A_coo_gpu, b_gpu) + Aᴴr_coo = A_coo_gpu' * (b_gpu - A_coo_gpu * x_coo) + println("cas 3") + println(norm(Aᴴr_coo)) @testset "ic0" begin A_cpu, b_cpu = sparse_laplacian()