diff --git a/docs/src/processes.md b/docs/src/processes.md index 2fb14a281..0df9313c6 100644 --- a/docs/src/processes.md +++ b/docs/src/processes.md @@ -424,5 +424,42 @@ Related methods: [`GPMR`](@ref gpmr). It also coincides with the Saunders-Simon-Yip process when $B = A^H$. ```@docs -montoison_orban +montoison_orban(::Any, ::Any, ::AbstractVector{FC}, ::AbstractVector{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) +``` + +If the vectors $b$ and $c$ are replaced by matrices $D$ and $C$ with both $p$ columns, we can derive the block Montoison-Orban process. + +![block_montoison_orban](./graphics/block_montoison_orban.png) + +After $k$ iterations of the block Montoison-Orban process, the situation may be summarized as +```math +\begin{align*} + A U_k &= V_{k+1} H_{k+1,k}, \\ + B V_k &= U_{k+1} F_{k+1,k}, \\ + V_k^H V_k &= U_k^H U_k = I_{pk}, +\end{align*} +``` +where $\begin{bmatrix} V_k & 0 \\ 0 & U_k \end{bmatrix}$ is an orthonormal basis of the block Krylov subspace $\mathcal{K}^{\square}_k \left(\begin{bmatrix} 0 & A \\ B & 0 \end{bmatrix}, \begin{bmatrix} D & 0 \\ 0 & C \end{bmatrix}\right)$, +```math +H_{k+1,k} = +\begin{bmatrix} + \Psi_{1,1}~ & \Psi_{1,2}~ & \ldots & \Psi_{1,k} \\ + \Psi_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & \Psi_{k-1,k} \\ + & & \Psi_{k,k-1} & \Psi_{k,k} \\ + & & & \Psi_{k+1,k} +\end{bmatrix} +, \qquad +F_{k+1,k} = +\begin{bmatrix} + \Phi_{1,1}~ & \Phi_{1,2}~ & \ldots & \Phi_{1,k} \\ + \Phi_{2,1}~ & \ddots~ & \ddots & \vdots \\ + & \ddots~ & \ddots & \Phi_{k-1,k} \\ + & & \Phi_{k,k-1} & \Phi_{k,k} \\ + & & & \Phi_{k+1,k} +\end{bmatrix}. +``` + +```@docs +montoison_orban(::Any, ::Any, ::AbstractMatrix{FC}, ::AbstractMatrix{FC}, ::Int) where FC <: (Union{Complex{T}, T} where T <: AbstractFloat) ``` diff --git a/src/Krylov.jl b/src/Krylov.jl index 05069d14b..56ca58f38 100644 --- a/src/Krylov.jl +++ b/src/Krylov.jl @@ -5,7 +5,9 @@ using LinearAlgebra, SparseArrays, Printf include("krylov_utils.jl") include("krylov_stats.jl") include("krylov_solvers.jl") + include("krylov_processes.jl") +include("block_krylov_processes.jl") include("cg.jl") include("cr.jl") diff --git a/src/block_krylov_processes.jl b/src/block_krylov_processes.jl new file mode 100644 index 000000000..ba11aa784 --- /dev/null +++ b/src/block_krylov_processes.jl @@ -0,0 +1,268 @@ +# """ +# # Q an n-by-k orthogonal matrix: QᵀQ = I +# # R an k-by-k upper triangular matrix: QR = A +# """ +function mgs(V::AbstractMatrix{FC}) where FC <: FloatOrComplex + k = size(V, 2) + Q = copy(V) + R = zeros(FC, k, k) + for j = 1:k + p = Q[:,j] + R[j,j] = sqrt(p' * p) + Q[:,j] = p / R[j,j] + for i = j+1:k + R[j,i] = Q[:,j]' * Q[:,i] + Q[:,i] = Q[:,i] - R[j,i] * Q[:,j] + end + end + return Q, R +end + +""" + V, T = hermitian_lanczos(A, B, k) + +#### Input arguments + +* `A`: a linear operator that models an Hermitian matrix of dimension n; +* `B`: a matrix of size n × p; +* `k`: the number of iterations of the block Hermitian Lanczos process. + +#### Output arguments + +* `V`: a dense n × p(k+1) matrix; +* `T`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p. +""" +function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + t, p = size(B) + R = real(FC) + + V = zeros(FC, n, (k+1)*p) + T = spzeros(FC, (k+1)*p, k*p) + + for i = 1:k + pos1 = (i-1)*p + 1 + pos2 = i*p + vᵢ = view(V,:,pos1:pos2) + vᵢ₊₁ = view(V,:,pos1+p:pos2+p) + if i == 1 + Q, Ψᵢ = mgs(B) + vᵢ .= Q + end + aux = A * vᵢ + if i ≥ 2 + vᵢ₋₁ = view(V,:,pos1-p:pos2-p) + Ψᵢ = T[pos1:pos2,pos1-p:pos2-p] + T[pos1-p:pos2-p,pos1:pos2] .= Ψᵢ' + aux = aux - vᵢ₋₁ * Ψᵢ' + end + Ωᵢ = vᵢ' * aux + T[pos1:pos2,pos1:pos2] .= Ωᵢ + aux = aux - vᵢ * Ωᵢ + Q, Ψᵢ₊₁ = mgs(aux) + vᵢ₊₁ .= Q + T[pos1+p:pos2+p,pos1:pos2] .= Ψᵢ₊₁ + end + return V, T +end + + +""" + V, H = arnoldi(A, B, k; reorthogonalization=false) + +#### Input arguments + +* `A`: a linear operator that models a square matrix of dimension n; +* `B`: a matrix of size n × p; +* `k`: the number of iterations of the block Arnoldi process. + +#### Keyword arguments + +* `reorthogonalization`: reorthogonalize the new matrices of the block Krylov basis against all previous matrices. + +#### Output arguments + +* `V`: a dense n × p(k+1) matrix; +* `H`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p. +""" +function arnoldi(A, B::AbstractMatrix{FC}, k::Int; reorthogonalization::Bool=false) where FC <: FloatOrComplex + m, n = size(A) + t, p = size(B) + + V = zeros(FC, n, (k+1)*p) + H = zeros(FC, (k+1)*p, k*p) + + for j = 1:k + pos1 = (j-1)*p + 1 + pos2 = j*p + vⱼ = view(V,:,pos1:pos2) + vⱼ₊₁ = view(V,:,pos1+p:pos2+p) + if j == 1 + Q, Γ = mgs(B) + vⱼ .= Q + end + q = A * vⱼ + for i = 1:j + pos3 = (i-1)*p + 1 + pos4 = i*p + vᵢ = view(V,:,pos3:pos4) + Ψᵢⱼ = vᵢ' * q + H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ + q = q - vᵢ * Ψᵢⱼ + end + if reorthogonalization + for i = 1:j + pos3 = (i-1)*p + 1 + pos4 = i*p + vᵢ = view(V,:,pos3:pos4) + Ψtmp = vᵢ' * q + q = q - vᵢ * Ψtmp + H[pos3:pos4,pos1:pos2] .+= Ψtmp + end + end + Q, Ψ = mgs(q) + H[pos1+p:pos2+p,pos1:pos2] .= Ψ + vⱼ₊₁ .= Q + end + return V, H +end + + +""" + V, U, L = golub_kahan(A, B, k) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a matrix of size m × p; +* `k`: the number of iterations of the block Golub-Kahan process. + +#### Output arguments + +* `V`: a dense n × p(k+1) matrix; +* `U`: a dense m × p(k+1) matrix; +* `L`: a sparse p(k+1) × p(k+1) block lower bidiagonal matrix with a lower bandwidth p. +""" +function golub_kahan(A, B::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex + m, n = size(A) + t, p = size(B) + R = real(FC) + Aᴴ = A' + + V = zeros(FC, n, (k+1)*p) + U = zeros(FC, m, (k+1)*p) + L = spzeros(FC, (k+1)*p, (k+1)*p) + + for i = 1:k + pos1 = (i-1)*p + 1 + pos2 = i*p + pos3 = pos1 + p + pos4 = pos2 + p + vᵢ = view(V,:,pos1:pos2) + vᵢ₊₁ = view(V,:,pos1+p:pos2+p) + uᵢ = view(U,:,pos1:pos2) + uᵢ₊₁ = view(U,:,pos1+p:pos2+p) + if i == 1 + Qu, Ψᵢ = mgs(B) + uᵢ .= Qu + q = Aᴴ * uᵢ + Qv, TΩᵢ = mgs(q) + vᵢ .= Qv + L[pos1:pos2,pos1:pos2] .= TΩᵢ' + end + aux1 = A * vᵢ + Ωᵢ = L[pos1:pos2,pos1:pos2] + aux1 = aux1 - uᵢ * Ωᵢ + Qu, Ψᵢ₊₁ = mgs(aux1) + uᵢ₊₁ .= Qu + L[pos3:pos4,pos1:pos2] .= Ψᵢ₊₁ + aux2 = Aᴴ * uᵢ₊₁ + aux2 = aux2 - vᵢ * Ψᵢ₊₁' + Qv, TΩᵢ₊₁ = mgs(aux2) + vᵢ₊₁ .= Qv + L[pos3:pos4,pos3:pos4] .= TΩᵢ₊₁' + end + return V, U, L +end + +""" + V, H, U, F = montoison_orban(A, B, D, C, k; reorthogonalization=false) + +#### Input arguments + +* `A`: a linear operator that models a matrix of dimension m × n; +* `B`: a linear operator that models a matrix of dimension n × m; +* `D`: a matrix of size m × p; +* `C`: a matrix of size n × p; +* `k`: the number of iterations of the block Montoison-Orban process. + +#### Keyword arguments + +* `reorthogonalization`: reorthogonalize the new matrices of the block Krylov basis against all previous matrices. + +#### Output arguments + +* `V`: a dense m × p(k+1) matrix; +* `H`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p; +* `U`: a dense n × p(k+1) matrix; +* `F`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p. +""" +function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::Int; reorthogonalization::Bool=false) where FC <: FloatOrComplex + m, n = size(A) + t, p = size(D) + + V = zeros(FC, m, (k+1)*p) + U = zeros(FC, n, (k+1)*p) + H = zeros(FC, (k+1)*p, k*p) + F = zeros(FC, (k+1)*p, k*p) + + for j = 1:k + pos1 = (j-1)*p + 1 + pos2 = j*p + vⱼ = view(V,:,pos1:pos2) + vⱼ₊₁ = view(V,:,pos1+p:pos2+p) + uⱼ = view(U,:,pos1:pos2) + uⱼ₊₁ = view(U,:,pos1+p:pos2+p) + if j == 1 + Qv, Γ = mgs(D) + vⱼ .= Qv + Qu, Λ = mgs(C) + uⱼ .= Qu + end + qv = A * uⱼ + qu = B * vⱼ + for i = 1:j + pos3 = (i-1)*p + 1 + pos4 = i*p + vᵢ = view(V,:,pos3:pos4) + uᵢ = view(U,:,pos3:pos4) + Ψᵢⱼ = vᵢ' * qv + H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ + qv = qv - vᵢ * Ψᵢⱼ + Φᵢⱼ = uᵢ' * qu + F[pos3:pos4,pos1:pos2] .= Φᵢⱼ + qu = qu - uᵢ * Φᵢⱼ + end + if reorthogonalization + for i = 1:j + pos3 = (i-1)*p + 1 + pos4 = i*p + vᵢ = view(V,:,pos3:pos4) + uᵢ = view(U,:,pos3:pos4) + Ψtmp = vᵢ' * qv + qv = qv - vᵢ * Ψtmp + H[pos3:pos4,pos1:pos2] .+= Ψtmp + Φtmp = uᵢ' * qu + qu = qu - uᵢ * Φtmp + F[pos3:pos4,pos1:pos2] .+= Φtmp + end + end + Qv, Ψ = mgs(qv) + H[pos1+p:pos2+p,pos1:pos2] .= Ψ + vⱼ₊₁ .= Qv + Qu, Φ = mgs(qu) + F[pos1+p:pos2+p,pos1:pos2] .= Φ + uⱼ₊₁ .= Qu + end + return V, H, U, F +end diff --git a/src/krylov_processes.jl b/src/krylov_processes.jl index adcc540c9..d482e04eb 100644 --- a/src/krylov_processes.jl +++ b/src/krylov_processes.jl @@ -1,25 +1,5 @@ export hermitian_lanczos, nonhermitian_lanczos, arnoldi, golub_kahan, saunders_simon_yip, montoison_orban -""" -# Q an n-by-k orthogonal matrix: QᵀQ = I -# R an k-by-k upper triangular matrix: QR = A -""" -function mgs(V::AbstractMatrix{FC}) where FC <: FloatOrComplex - k = size(V, 2) - Q = copy(V) - R = zeros(FC, k, k) - for j = 1:k - p = Q[:,j] - R[j,j] = sqrt(p' * p) - Q[:,j] = p / R[j,j] - for i = j+1:k - R[j,i] = Q[:,j]' * Q[:,i] - Q[:,i] = Q[:,i] - R[j,i] * Q[:,j] - end - end - return Q, R -end - """ V, T = hermitian_lanczos(A, b, k) @@ -90,54 +70,6 @@ function hermitian_lanczos(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOr return V, T end -""" - V, T = hermitian_lanczos(A, B, k) - -#### Input arguments - -* `A`: a linear operator that models an Hermitian matrix of dimension n; -* `B`: a matrix of size n × p; -* `k`: the number of iterations of the block Hermitian Lanczos process. - -#### Output arguments - -* `V`: a dense n × p(k+1) matrix; -* `T`: a sparse p(k+1) × pk block tridiagonal matrix with a bandwidth p. -""" -function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex - m, n = size(A) - t, p = size(B) - R = real(FC) - - V = zeros(FC, n, (k+1)*p) - T = spzeros(FC, (k+1)*p, k*p) - - for i = 1:k - pos1 = (i-1)*p + 1 - pos2 = i*p - vᵢ = view(V,:,pos1:pos2) - vᵢ₊₁ = view(V,:,pos1+p:pos2+p) - if i == 1 - Q, Ψᵢ = mgs(B) - vᵢ .= Q - end - aux = A * vᵢ - if i ≥ 2 - vᵢ₋₁ = view(V,:,pos1-p:pos2-p) - Ψᵢ = T[pos1:pos2,pos1-p:pos2-p] - T[pos1-p:pos2-p,pos1:pos2] .= Ψᵢ' - aux = aux - vᵢ₋₁ * Ψᵢ' - end - Ωᵢ = vᵢ' * aux - T[pos1:pos2,pos1:pos2] .= Ωᵢ - aux = aux - vᵢ * Ωᵢ - Q, Ψᵢ₊₁ = mgs(aux) - vᵢ₊₁ .= Q - T[pos1+p:pos2+p,pos1:pos2] .= Ψᵢ₊₁ - end - return V, T -end - """ V, T, U, Tᴴ = nonhermitian_lanczos(A, b, c, k) @@ -289,52 +221,6 @@ function arnoldi(A, b::AbstractVector{FC}, k::Int; reorthogonalization::Bool=fal return V, H end -""" - V, H = arnoldi(A, B, k) - -#### Input arguments - -* `A`: a linear operator that models a square matrix of dimension n; -* `B`: a matrix of size n × p; -* `k`: the number of iterations of the block Arnoldi process. - -#### Output arguments - -* `V`: a dense n × p(k+1) matrix; -* `H`: a dense p(k+1) × pk block upper Hessenberg matrix with a lower bandwidth p. -""" -function arnoldi(A, B::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex - m, n = size(A) - t, p = size(B) - - V = zeros(FC, n, (k+1)*p) - H = zeros(FC, (k+1)*p, k*p) - - for i = 1:k - pos1 = (i-1)*p + 1 - pos2 = i*p - vᵢ = view(V,:,pos1:pos2) - vᵢ₊₁ = view(V,:,pos1+p:pos2+p) - if i == 1 - Q, Γ = mgs(B) - vᵢ .= Q - end - q = A * vᵢ - for j = 1:i - pos3 = (j-1)*p + 1 - pos4 = j*p - vⱼ = view(V,:,pos3:pos4) - Ψⱼᵢ = vⱼ' * q - H[pos3:pos4,pos1:pos2] .= Ψⱼᵢ - q = q - vⱼ * Ψⱼᵢ - end - Q, Ψ = mgs(q) - H[pos1+p:pos2+p,pos1:pos2] .= Ψ - vᵢ₊₁ .= Q - end - return V, H -end - """ V, U, L = golub_kahan(A, b, k) @@ -411,63 +297,6 @@ function golub_kahan(A, b::AbstractVector{FC}, k::Int) where FC <: FloatOrComple return V, U, L end -""" - V, U, L = golub_kahan(A, B, k) - -#### Input arguments - -* `A`: a linear operator that models a matrix of dimension m × n; -* `B`: a matrix of size m × p; -* `k`: the number of iterations of the block Golub-Kahan process. - -#### Output arguments - -* `V`: a dense n × p(k+1) matrix; -* `U`: a dense m × p(k+1) matrix; -* `L`: a sparse p(k+1) × p(k+1) block lower bidiagonal matrix with a lower bandwidth p. -""" -function golub_kahan(A, B::AbstractMatrix{FC}, k::Int) where FC <: FloatOrComplex - m, n = size(A) - t, p = size(B) - R = real(FC) - Aᴴ = A' - - V = zeros(FC, n, (k+1)*p) - U = zeros(FC, m, (k+1)*p) - L = spzeros(FC, (k+1)*p, (k+1)*p) - - for i = 1:k - pos1 = (i-1)*p + 1 - pos2 = i*p - pos3 = pos1 + p - pos4 = pos2 + p - vᵢ = view(V,:,pos1:pos2) - vᵢ₊₁ = view(V,:,pos1+p:pos2+p) - uᵢ = view(U,:,pos1:pos2) - uᵢ₊₁ = view(U,:,pos1+p:pos2+p) - if i == 1 - Qu, Ψᵢ = mgs(B) - uᵢ .= Qu - q = Aᴴ * uᵢ - Qv, TΩᵢ = mgs(q) - vᵢ .= Qv - L[pos1:pos2,pos1:pos2] .= TΩᵢ' - end - aux1 = A * vᵢ - Ωᵢ = L[pos1:pos2,pos1:pos2] - aux1 = aux1 - uᵢ * Ωᵢ - Qu, Ψᵢ₊₁ = mgs(aux1) - uᵢ₊₁ .= Qu - L[pos3:pos4,pos1:pos2] .= Ψᵢ₊₁ - aux2 = Aᴴ * uᵢ₊₁ - aux2 = aux2 - vᵢ * Ψᵢ₊₁' - Qv, TΩᵢ₊₁ = mgs(aux2) - vᵢ₊₁ .= Qv - L[pos3:pos4,pos3:pos4] .= TΩᵢ₊₁' - end - return V, U, L -end - """ V, T, U, Tᴴ = saunders_simon_yip(A, b, c, k) diff --git a/test/test_processes.jl b/test/test_processes.jl index e4edecc7e..57b6c7df6 100644 --- a/test/test_processes.jl +++ b/test/test_processes.jl @@ -44,10 +44,7 @@ end @testset "Block Hermitian Lanczos" begin A, _ = symmetric_indefinite(n, FC=FC) - B = Matrix{FC}(I, n, p) - if FC == ComplexF64 - B .+= im - end + B = rand(FC, n, p) V, T = hermitian_lanczos(A, B, k) # @test A * V[:,1:p*k] ≈ V * T @@ -93,22 +90,21 @@ end end @testset "Block Arnoldi" begin - A, _ = nonsymmetric_indefinite(n, FC=FC) - B = Matrix{FC}(I, n, p) - if FC == ComplexF64 - B .+= im - end - V, H = arnoldi(A, B, k) + for reorthogonalization in (false, true) + A, _ = nonsymmetric_indefinite(n, FC=FC) + B = rand(FC, n, p) + V, H = arnoldi(A, B, k; reorthogonalization) - # @test A * V[:,1:k] ≈ V * H + # @test A * V[:,1:k] ≈ V * H - # function storage_block_arnoldi_bytes(n, p, k) - # return ... - # end + # function storage_block_arnoldi_bytes(n, p, k) + # return ... + # end - # expected_block_arnoldi_bytes = storage_block_arnoldi_bytes(n, p, k) - # actual_block_arnoldi_bytes = @allocated arnoldi(A, B, k) - # @test expected_block_arnoldi_bytes ≤ actual_block_arnoldi_bytes ≤ 1.02 * expected_block_arnoldi_bytes + # expected_block_arnoldi_bytes = storage_block_arnoldi_bytes(n, p, k) + # actual_block_arnoldi_bytes = @allocated arnoldi(A, B, k; reorthogonalization) + # @test expected_block_arnoldi_bytes ≤ actual_block_arnoldi_bytes ≤ 1.02 * expected_block_arnoldi_bytes + end end @testset "Golub-Kahan" begin @@ -130,10 +126,7 @@ end @testset "Block Golub-Kahan" begin A, _ = under_consistent(m, n, FC=FC) - B = Matrix{FC}(I, m, p) - if FC == ComplexF64 - B .+= im - end + B = rand(FC, m, p) V, U, L = golub_kahan(A, B, k) BL = L[1:(k+1)*p,1:k*p] @@ -203,6 +196,29 @@ end @test expected_montoison_orban_bytes ≤ actual_montoison_orban_bytes ≤ 1.02 * expected_montoison_orban_bytes end end + + @testset "Block Montoison-Orban" begin + for reorthogonalization in (false, true) + A, _ = under_consistent(m, n, FC=FC) + B, _ = over_consistent(n, m, FC=FC) + D = rand(FC, m, p) + C = rand(FC, n, p) + V, H, U, F = montoison_orban(A, B, D, C, k; reorthogonalization) + + @test A * U[:,1:k*p] ≈ V * H + @test B * V[:,1:k*p] ≈ U * F + @test B * A * U[:,1:(k-1)*p] ≈ U * F * H[1:k*p,1:(k-1)*p] + @test A * B * V[:,1:(k-1)*p] ≈ V * H * F[1:k*p,1:(k-1)*p] + + function storage_block_montoison_orban_bytes(m, n, p, k) + return 2*k*(k+1) * nbits_FC + (n+m)*(k+1) * nbits_FC + end + + expected_block_montoison_orban_bytes = storage_block_montoison_orban_bytes(m, n, p, k) + actual_block_montoison_orban_bytes = @allocated montoison_orban(A, B, b, c, k; reorthogonalization) + @test expected_block_montoison_orban_bytes ≤ actual_block_montoison_orban_bytes ≤ 1.02 * expected_block_montoison_orban_bytes + end + end end end end