From 80fa61cf9d0371a67bf1f68064de504cba14730b Mon Sep 17 00:00:00 2001 From: Alexis Montoison Date: Thu, 7 Dec 2023 23:47:19 -0600 Subject: [PATCH] Add comments about using block-GMRES on GPUs --- docs/src/block_krylov.md | 23 +++++++++++++++++++++++ src/block_krylov_utils.jl | 14 -------------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/docs/src/block_krylov.md b/docs/src/block_krylov.md index 2db83c839..428b1ca35 100644 --- a/docs/src/block_krylov.md +++ b/docs/src/block_krylov.md @@ -1,5 +1,28 @@ ## Block-GMRES +!!! note + `block_gmres` works on GPUs + with Julia 1.11. + +If you want to use `block_gmres` on previous Julia versions, you can overload the function `Krylov.copy_triangle` with the following code: +```julia +using KernelAbstractions, Krylov + +@kernel function copy_triangle_kernel!(dest, src) + i, j = @index(Global, NTuple) + if j >= i + @inbounds dest[i, j] = src[i, j] + end +end + +function Krylov.copy_triangle(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, k::Int) where FC <: Krylov.FloatOrComplex + backend = get_backend(Q) + ndrange = (k, k) + copy_triangle_kernel!(backend)(R, Q; ndrange=ndrange) + KernelAbstractions.synchronize(backend) +end +``` + ```@docs block_gmres block_gmres! diff --git a/src/block_krylov_utils.jl b/src/block_krylov_utils.jl index c09071fa4..68ba46458 100644 --- a/src/block_krylov_utils.jl +++ b/src/block_krylov_utils.jl @@ -156,20 +156,6 @@ function reduced_qr(A::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComp return Q, R end -# @kernel function copy_triangle_kernel!(dest, src) -# i, j = @index(Global, NTuple) -# if j >= i -# @inbounds dest[i, j] = src[i, j] -# end -# end - -# function copy_triangle(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex -# backend = get_backend(Q) -# ndrange = (k, k) -# copy_triangle_kernel!(backend)(R, Q; ndrange=ndrange) -# KernelAbstractions.synchronize(backend) -# end - function copy_triangle(Q::AbstractMatrix{FC}, R::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex if VERSION < v"1.11" for i = 1:k