Skip to content

Commit

Permalink
Use BLAS3 routines as much as possible
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Sep 15, 2023
1 parent f7fc22b commit 6aa9372
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 43 deletions.
86 changes: 50 additions & 36 deletions src/block_krylov_processes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ function householder(A::AbstractMatrix{FC}) where FC <: FloatOrComplex
return Q, R

Check warning on line 91 in src/block_krylov_processes.jl

View check run for this annotation

Codecov / codecov/patch

src/block_krylov_processes.jl#L78-L91

Added lines #L78 - L91 were not covered by tests
end

function reduced_qr(V::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex
function reduced_qr(A::AbstractMatrix{FC}, algo::String) where FC <: FloatOrComplex
if algo == "gs"
Q, R = gs(V)
Q, R = gs(A)
elseif algo == "mgs"
Q, R = mgs(V)
Q, R = mgs(A)
else
error("$algo is not a supported method to perform a reduced QR.")

Check warning on line 100 in src/block_krylov_processes.jl

View check run for this annotation

Codecov / codecov/patch

src/block_krylov_processes.jl#L100

Added line #L100 was not covered by tests
end
Expand All @@ -119,11 +119,12 @@ end
function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where FC <: FloatOrComplex
m, n = size(A)
t, p = size(B)
R = real(FC)

V = zeros(FC, n, (k+1)*p)
T = spzeros(FC, (k+1)*p, k*p)

α = -one(FC)
β = one(FC)
q = zeros(FC, n, p)
Ωᵢ = zeros(FC, p, p)

Expand All @@ -141,11 +142,11 @@ function hermitian_lanczos(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs")
vᵢ₋₁ = view(V,:,pos1-p:pos2-p)
Ψᵢ = T[pos1:pos2,pos1-p:pos2-p]
T[pos1-p:pos2-p,pos1:pos2] .= Ψᵢ'
q = q - vᵢ₋₁ * Ψᵢ'
mul!(q, vᵢ₋₁, Ψᵢ', α, β) # q = q - vᵢ₋₁ * Ψᵢᴴ
end
mul!(Ωᵢ, vᵢ', q) # Ωᵢ = vᵢᵀq
mul!(Ωᵢ, vᵢ', q) # Ωᵢ = vᵢᴴ * q
mul!(q, vᵢ, Ωᵢ, α, β) # q = q - vᵢ * Ωᵢᴴ
T[pos1:pos2,pos1:pos2] .= Ωᵢ
q = q - vᵢ * Ωᵢ
Q, Ψᵢ₊₁ = reduced_qr(q, algo)
vᵢ₊₁ .= Q
T[pos1+p:pos2+p,pos1:pos2] .= Ψᵢ₊₁
Expand Down Expand Up @@ -174,16 +175,19 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k
m, n = size(A)
t, p = size(B)
Aᴴ = A'
pivot = NoPivot()
pivoting = Val{false}()

V = zeros(FC, n, (k+1)*p)
U = zeros(FC, n, (k+1)*p)
T = zeros(FC, (k+1)*p, k*p)
Tᴴ = zeros(FC, (k+1)*p, k*p)

α = -one(FC)
β = one(FC)
qv = zeros(FC, n, p)
qu = zeros(FC, n, p)
Ωᵢ = zeros(FC, p, p)
D = zeros(FC, p, p)

for i = 1:k
pos1 = (i-1)*p + 1
Expand All @@ -195,8 +199,8 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k
uᵢ = view(U,:,pos1:pos2)
uᵢ₊₁ = view(U,:,pos3:pos4)
if i == 1
D = C' * B
F = lu(D, pivot)
mul!(D, C', B) # D = Cᴴ * B
F = lu(D, pivoting)
Φᵢ, Ψᵢ = F.L, F.U
# Φᵢ = F.P' * Φᵢ
vᵢ .= (Ψᵢ' \ B')'
Expand All @@ -211,18 +215,18 @@ function nonhermitian_lanczos(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k
uᵢ₋₁ = view(U,:,pos5:pos6)
Φᵢ = Tᴴ[pos1:pos2,pos5:pos6]'
T[pos5:pos6,pos1:pos2] = Φᵢ
qv = qv - vᵢ₋₁ * Φᵢ
mul!(qv, vᵢ₋₁, Φᵢ, α, β) # qv = qv - vᵢ₋₁ * Φᵢ
TΨᵢ = T[pos1:pos2,pos5:pos6]'
Tᴴ[pos5:pos6,pos1:pos2] = TΨᵢ
qu = qu - uᵢ₋₁ * TΨᵢ
mul!(qu, uᵢ₋₁, TΨᵢ, α, β) # qu = qu - uᵢ₋₁ * Ψᵢᴴ
end
mul!(Ωᵢ, uᵢ', qv)
T[pos1:pos2,pos1:pos2] .= Ωᵢ
Tᴴ[pos1:pos2,pos1:pos2] .= Ωᵢ'
qv = qv - vᵢ * Ωᵢ
qu = qu - uᵢ * Ωᵢ'
D = qu' * qv
F = lu(D, pivot)
mul!(qv, vᵢ, Ωᵢ, α, β) # qv = qv - vᵢ * Ωᵢ
mul!(qu, uᵢ, Ωᵢ', α, β) # qu = qu - uᵢ * Ωᵢᴴ
mul!(D, qu', qv) # D = quᴴ * qv
F = lu(D, pivoting)
Φᵢ₊₁, Ψᵢ₊₁ = F.L, F.U
# Φᵢ₊₁ = F.P' * Φᵢ₊₁
vᵢ₊₁ .= (Ψᵢ₊₁' \ qv')'
Expand Down Expand Up @@ -257,7 +261,11 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo

V = zeros(FC, n, (k+1)*p)
H = zeros(FC, (k+1)*p, k*p)

α = -one(FC)
β = one(FC)
q = zeros(FC, n, p)
Ψᵢⱼ = Ψtmp = zeros(FC, p, p)

for j = 1:k
pos1 = (j-1)*p + 1
Expand All @@ -273,17 +281,17 @@ function arnoldi(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs", reorthogo
pos3 = (i-1)*p + 1
pos4 = i*p
vᵢ = view(V,:,pos3:pos4)
Ψᵢⱼ = vᵢ' * q
mul!(Ψᵢⱼ, vᵢ', q) # Ψᵢⱼ = vᵢᴴ * q
mul!(q, vᵢ, Ψᵢⱼ, α, β) # q = q - vᵢ * Ψᵢⱼ
H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ
q = q - vᵢ * Ψᵢⱼ
end
if reorthogonalization
for i = 1:j
pos3 = (i-1)*p + 1
pos4 = i*p
vᵢ = view(V,:,pos3:pos4)
Ψtmp = vᵢ' * q
q = q - vᵢ * Ψtmp
mul!(Ψtmp, vᵢ', q) # Ψtmp = vᵢᴴ * q
mul!(q, vᵢ, Ψtmp, α, β) # q = q - vᵢ * Ψtmp
H[pos3:pos4,pos1:pos2] .+= Ψtmp
end
end
Expand Down Expand Up @@ -313,13 +321,14 @@ end
function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where FC <: FloatOrComplex
m, n = size(A)
t, p = size(B)
R = real(FC)
Aᴴ = A'

V = zeros(FC, n, (k+1)*p)
U = zeros(FC, m, (k+1)*p)
L = spzeros(FC, (k+1)*p, (k+1)*p)

α = -one(FC)
β = one(FC)
qv = zeros(FC, n, p)
qu = zeros(FC, m, p)

Expand All @@ -340,14 +349,14 @@ function golub_kahan(A, B::AbstractMatrix{FC}, k::Int; algo::String="mgs") where
vᵢ .= Qv
L[pos1:pos2,pos1:pos2] .= TΩᵢ'
end
mul!(qu, A, vᵢ)
Ωᵢ = L[pos1:pos2,pos1:pos2]
qu = qu - uᵢ * Ωᵢ
mul!(qu, A, vᵢ)
mul!(qu, uᵢ, Ωᵢ, α, β) # qu = qu - uᵢ * Ωᵢ
Qu, Ψᵢ₊₁ = reduced_qr(qu, algo)
uᵢ₊₁ .= Qu
L[pos3:pos4,pos1:pos2] .= Ψᵢ₊₁
mul!(qv, Aᴴ, uᵢ₊₁)
qv = qv - vᵢ * Ψᵢ₊₁'
mul!(qv, vᵢ, Ψᵢ₊₁', α, β) # qv = qv - vᵢ * Ψᵢ₊₁
Qv, TΩᵢ₊₁ = reduced_qr(qv, algo)
vᵢ₊₁ .= Qv
L[pos3:pos4,pos3:pos4] .= TΩᵢ₊₁'
Expand Down Expand Up @@ -382,6 +391,8 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::
T = zeros(FC, (k+1)*p, k*p)
Tᴴ = zeros(FC, (k+1)*p, k*p)

α = -one(FC)
β = one(FC)
qv = zeros(FC, m, p)
qu = zeros(FC, n, p)
Ωᵢ = zeros(FC, p, p)
Expand Down Expand Up @@ -410,16 +421,16 @@ function saunders_simon_yip(A, B::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::
uᵢ₋₁ = view(U,:,pos5:pos6)
Φᵢ = Tᴴ[pos1:pos2,pos5:pos6]'
T[pos5:pos6,pos1:pos2] = Φᵢ
qv = qv - vᵢ₋₁ * Φᵢ
mul!(qv, vᵢ₋₁, Φᵢ, α, β) # qv = qv - vᵢ₋₁ * Φᵢ
TΨᵢ = T[pos1:pos2,pos5:pos6]'
Tᴴ[pos5:pos6,pos1:pos2] = TΨᵢ
qu = qu - uᵢ₋₁ * TΨᵢ
mul!(qu, uᵢ₋₁, TΨᵢ, α, β) # qu = qu - uᵢ₋₁ * Ψᵢᴴ
end
mul!(Ωᵢ, vᵢ', qv)
T[pos1:pos2,pos1:pos2] .= Ωᵢ
Tᴴ[pos1:pos2,pos1:pos2] .= Ωᵢ'
qv = qv - vᵢ * Ωᵢ
qu = qu - uᵢ * Ωᵢ'
mul!(qv, vᵢ, Ωᵢ, α, β) # qv = qv - vᵢ * Ωᵢ
mul!(qu, uᵢ, Ωᵢ', α, β) # qu = qu - uᵢ * Ωᵢᴴ
Qv, Ψᵢ₊₁ = reduced_qr(qv, algo)
vᵢ₊₁ .= Qv
T[pos3:pos4,pos1:pos2] .= Ψᵢ₊₁
Expand Down Expand Up @@ -461,8 +472,11 @@ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::
H = zeros(FC, (k+1)*p, k*p)
F = zeros(FC, (k+1)*p, k*p)

α = -one(FC)
β = one(FC)
qv = zeros(FC, m, p)
qu = zeros(FC, n, p)
Ψᵢⱼ = Φᵢⱼ = Ψtmp = Φtmp = zeros(FC, p, p)

for j = 1:k
pos1 = (j-1)*p + 1
Expand All @@ -484,24 +498,24 @@ function montoison_orban(A, B, D::AbstractMatrix{FC}, C::AbstractMatrix{FC}, k::
pos4 = i*p
vᵢ = view(V,:,pos3:pos4)
uᵢ = view(U,:,pos3:pos4)
Ψᵢⱼ = vᵢ' * qv
mul!(Ψᵢⱼ, vᵢ', qv) # Ψᵢⱼ = vᵢᴴ * qv
mul!(qv, vᵢ, Ψᵢⱼ, α, β) # qv = qv - vᵢ * Ψᵢⱼ
H[pos3:pos4,pos1:pos2] .= Ψᵢⱼ
qv = qv - vᵢ * Ψᵢⱼ
Φᵢⱼ = uᵢ' * qu
mul!(Φᵢⱼ, uᵢ', qu) # Φᵢⱼ = uᵢᴴ * qu
mul!(qu, uᵢ, Φᵢⱼ, α, β) # qu = qu - uᵢ * Φᵢⱼ
F[pos3:pos4,pos1:pos2] .= Φᵢⱼ
qu = qu - uᵢ * Φᵢⱼ
end
if reorthogonalization
for i = 1:j
pos3 = (i-1)*p + 1
pos4 = i*p
vᵢ = view(V,:,pos3:pos4)
uᵢ = view(U,:,pos3:pos4)
Ψtmp = vᵢ' * qv
qv = qv - vᵢ * Ψtmp
mul!(Ψtmp, vᵢ', qv) # Ψtmp = vᵢᴴ * qv
mul!(qv, vᵢ, Ψtmp, α, β) # qv = qv - vᵢ * Ψtmp
H[pos3:pos4,pos1:pos2] .+= Ψtmp
Φtmp = uᵢ' * qu
qu = qu - uᵢ * Φtmp
mul!(Φtmp, uᵢ', qu) # Φtmp = uᵢᴴ * qu
mul!(qu, uᵢ, Φtmp, α, β) # qu = qu - uᵢ * Φtmp
F[pos3:pos4,pos1:pos2] .+= Φtmp
end
end
Expand Down
14 changes: 7 additions & 7 deletions test/test_processes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ end
A = A' * A
B = rand(FC, n, p)

@testset "$algo" for algo in ("gs", "mgs")
@testset "algo = $algo" for algo in ("gs", "mgs")
V, T = hermitian_lanczos(A, B, k; algo)

@test norm(V[:,1:p*s]' * V[:,1:p*s] - I) 1e-4
Expand Down Expand Up @@ -118,8 +118,8 @@ end
A = rand(FC, n, n)
B = rand(FC, n, p)

@testset "$algo" for algo in ("gs", "mgs")
@testset "$reorthogonalization" for reorthogonalization in (false, true)
@testset "algo = $algo" for algo in ("gs", "mgs")
@testset "reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true)
V, H = arnoldi(A, B, k; algo, reorthogonalization)

@test norm(V[:,1:p*s]' * V[:,1:p*s] - I) 1e-4
Expand Down Expand Up @@ -158,7 +158,7 @@ end
A = rand(FC, m, n)
B = rand(FC, m, p)

@testset "$algo" for algo in ("gs", "mgs")
@testset "algo = $algo" for algo in ("gs", "mgs")
V, U, L = golub_kahan(A, B, k; algo)
BL = L[1:(k+1)*p,1:k*p]

Expand Down Expand Up @@ -208,7 +208,7 @@ end
B = rand(FC, m, p)
C = rand(FC, n, p)

@testset "$algo" for algo in ("gs", "mgs")
@testset "algo = $algo" for algo in ("gs", "mgs")
V, T, U, Tᴴ = saunders_simon_yip(A, B, C, k; algo)

@test norm(V[:,1:p*s]' * V[:,1:p*s] - I) 1e-4
Expand Down Expand Up @@ -262,8 +262,8 @@ end
D = rand(FC, m, p)
C = rand(FC, n, p)

@testset "$algo" for algo in ("gs", "mgs")
@testset "$reorthogonalization" for reorthogonalization in (false, true)
@testset "algo = $algo" for algo in ("gs", "mgs")
@testset "reorthogonalization = $reorthogonalization" for reorthogonalization in (false, true)
V, H, U, F = montoison_orban(A, B, D, C, k; algo, reorthogonalization)

@test norm(V[:,1:p*s]' * V[:,1:p*s] - I) 1e-4
Expand Down

0 comments on commit 6aa9372

Please sign in to comment.