Skip to content

Commit

Permalink
Edits to enable GPU runs with gpmd
Browse files Browse the repository at this point in the history
Co-Authored-By: cnegre <christianfannegre@gmail.com>

o Address runtime errorsr associated with hamiltonian/overlap build
o Canonical response speedup
  • Loading branch information
mewall committed Jul 20, 2023
1 parent 2d673b9 commit e6a7075
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 74 deletions.
51 changes: 35 additions & 16 deletions src/latte_mods/ham_latte_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -169,22 +169,33 @@ subroutine get_hsmat(ham_bml,over_bml,coordinate,lattice_vector,spindex,&
! if(bml_get_N(ham_bml).LE.0) then !Carefull we need to clean S and H before rebuilding them!!!
call bml_deallocate(ham_bml)
call bml_deallocate(over_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml, &
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml, &
bml_dmode)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,mdim,over_bml, &
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,mdim,over_bml, &
bml_dmode)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,mdim,ham_bml, &
! bml_dmode)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,mdim,over_bml, &
! bml_dmode)
! endif

maxnorbi = maxval(norbi)

if(.not.allocated(ham)) then
allocate(ham(norb,norb))
endif
if(.not.allocated(over)) then
allocate(over(norb,norb))
endif

if (.not.allocated(block)) then
allocate(block(maxnorbi,maxnorbi,nats))
endif

!$omp parallel do default(none) &
!$omp private(ra,rb,dimi,dimj,ii,jj,j) &
!$omp shared(nats,coordinate,hindex,spindex, intPairsS,intPairsH,threshold,lattice_vector,norbi,onsitesH,onsitesS,ham_bml,over_bml) &
!$omp shared(block)
!$omp shared(block,ham,over)
do i = 1, nats
ra(:) = coordinate(:,i)
dimi = hindex(2,i)-hindex(1,i)+1
Expand All @@ -198,11 +209,11 @@ subroutine get_hsmat(ham_bml,over_bml,coordinate,lattice_vector,spindex,&

do jj=1,dimj
do ii=1,dimi
! ham(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = block(ii,jj)
if(abs(block(ii,jj,i)).gt.threshold)then
call bml_set_element_new(ham_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,block(ii,jj,i))
endif
ham(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = block(ii,jj,i)
! if(abs(block(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(ham_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,block(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -212,11 +223,11 @@ subroutine get_hsmat(ham_bml,over_bml,coordinate,lattice_vector,spindex,&

do jj=1,dimj
do ii=1,dimi
! over(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = block(ii,jj)
if(abs(block(ii,jj,i)).gt.threshold)then
call bml_set_element_new(over_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,block(ii,jj,i))
endif
over(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = block(ii,jj,i)
!if(abs(block(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(over_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,block(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -226,11 +237,19 @@ subroutine get_hsmat(ham_bml,over_bml,coordinate,lattice_vector,spindex,&

enddo
enddo

!$omp end parallel do

! bml_type=bml_get_type(ham_bml) !Get the bml type
! call bml_import_from_dense(bml_type,over,over_bml,threshold,norb) !Dense to dense_bml
! call bml_import_from_dense(bml_type,ham,ham_bml,threshold,norb) !Dense to dense_bml
bml_type=bml_get_type(ham_bml) !Get the bml type
call bml_import_from_dense(bml_type,over,over_bml,threshold,norb) !Dense to dense_bml
call bml_import_from_dense(bml_type,ham,ham_bml,threshold,norb) !Dense to dense_bml

if(allocated(ham)) then
deallocate(ham)
endif
if(allocated(over)) then
deallocate(over)
endif

! call bml_print_matrix("ham_bml",ham_bml,0,6,0,6)
! call bml_print_matrix("over_bml",over_bml,0,6,0,6)
Expand Down
154 changes: 114 additions & 40 deletions src/latte_mods/hsderivative_latte_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec
real(dp) :: Rax_m(3), Rax_p(3), Ray_m(3), Ray_p(3)
real(dp) :: Raz_m(3), Raz_p(3), Rb(3), d, maxblockij
real(dp), allocatable :: Rx(:), Ry(:), Rz(:), blockm(:,:,:)
real(dp), allocatable :: blockp(:,:,:), dh0(:,:)
real(dp), allocatable :: blockp(:,:,:), dh0(:,:), dH0x(:,:), dH0y(:,:), dH0z(:,:)
real(dp), intent(in) :: coords(:,:), dx, lattice_vectors(:,:), onsitesH(:,:)
real(dp), intent(in) :: threshold
type(bml_matrix_t), intent(inout) :: dH0x_bml, dH0y_bml, dH0z_bml
Expand All @@ -64,20 +64,40 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec
nats = size(coords,dim=2)

if(bml_get_N(dH0x_bml).LT.0)then
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
else
call bml_deallocate(dH0x_bml)
call bml_deallocate(dH0y_bml)
call bml_deallocate(dH0z_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0x_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0y_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dH0z_bml)
endif

! dH0x = zeros(HDIM,HDIM); dH0y = zeros(HDIM,HDIM); dH0z = zeros(HDIM,HDIM);

if (.not.allocated(dH0x)) then
allocate(dH0x(norb,norb))
endif
if (.not.allocated(dH0y)) then
allocate(dH0y(norb,norb))
endif
if (.not.allocated(dH0z)) then
allocate(dH0z(norb,norb))
endif

dH0x = 0.0_dp
dH0y = 0.0_dp
dH0z = 0.0_dp

allocate(Rx(nats))
allocate(Ry(nats))
allocate(Rz(nats))
Expand All @@ -101,7 +121,7 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec
!$omp private(dimi,J,Type_pair,dimj,Rb,maxblockij) &
!$omp shared(nats,RX,RY,RZ,spindex,hindex,lattice_vectors, dx, threshold) &
!$omp shared(norbi,intPairsH,onsitesH,symbol,dH0x_bml,dH0y_bml,dH0z_bml) &
!$omp shared(blockm, blockp)
!$omp shared(blockm, blockp, dH0x, dH0y, dH0z)
do I = 1, nats

Type_pair(1) = symbol(i);
Expand Down Expand Up @@ -149,10 +169,11 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dH0x_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dH0x(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dH0x_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -174,10 +195,11 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dH0y_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dH0y(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dH0y_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -199,10 +221,11 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dH0z_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dH0z(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dH0z_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -211,6 +234,20 @@ subroutine get_dH(dx,coords,hindex,spindex,intPairsH,onsitesH,symbol,lattice_vec
enddo
enddo
!$omp end parallel do
!bml_type=bml_get_type(dH0x_bml) !Get the bml type
call bml_import_from_dense(bml_type,dH0x,dH0x_bml,threshold,norb) !Dense to dense_bml
call bml_import_from_dense(bml_type,dH0y,dH0y_bml,threshold,norb) !Dense to dense_bml
call bml_import_from_dense(bml_type,dH0z,dH0z_bml,threshold,norb) !Dense to dense_bml

if (allocated(dH0x)) then
deallocate(dH0x)
endif
if (allocated(dH0y)) then
deallocate(dH0y)
endif
if (allocated(dH0z)) then
deallocate(dH0z)
endif

! stop
end subroutine get_dH
Expand Down Expand Up @@ -245,7 +282,7 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec
real(dp) :: Rax_m(3), Rax_p(3), Ray_m(3), Ray_p(3)
real(dp) :: Raz_m(3), Raz_p(3), Rb(3), maxblockij
real(dp), allocatable :: Rx(:), Ry(:), Rz(:), blockm(:,:,:)
real(dp), allocatable :: blockp(:,:,:), dh0(:,:)
real(dp), allocatable :: blockp(:,:,:), dh0(:,:),dSx(:,:),dSy(:,:),dSz(:,:)
real(dp), intent(in) :: coords(:,:), dx, lattice_vectors(:,:), onsitesS(:,:)
real(dp), intent(in) :: threshold
type(bml_matrix_t), intent(inout) :: dSx_bml, dSy_bml, dSz_bml
Expand All @@ -256,22 +293,42 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec
nats = size(coords,dim=2)

if(bml_get_N(dSx_bml).LT.0)then
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
else
call bml_deallocate(dSx_bml)
call bml_deallocate(dSy_bml)
call bml_deallocate(dSz_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
call bml_zero_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSx_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSy_bml)
!call bml_noinit_matrix(bml_type,bml_element_real,dp,norb,norb,dSz_bml)
endif

if (.not.allocated(dSx)) then
allocate(dSx(norb,norb))
endif
if (.not.allocated(dSy)) then
allocate(dSy(norb,norb))
endif
if (.not.allocated(dSz)) then
allocate(dSz(norb,norb))
endif

allocate(Rx(nats))
allocate(Ry(nats))
allocate(Rz(nats))

dSx = 0.0_dp
dSy = 0.0_dp
dSz = 0.0_dp

Rx = coords(1,:)
Ry = coords(2,:)
Rz = coords(3,:)
Expand All @@ -291,7 +348,7 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec
!$omp private(dimi,J,Type_pair,dimj,Rb,maxblockij) &
!$omp shared(nats,RX,RY,RZ,spindex,hindex,lattice_vectors, dx, threshold) &
!$omp shared(norbi,intPairsS,onsitesS,symbol,dSx_bml,dSy_bml,dSz_bml) &
!$omp shared(blockm,blockp)
!$omp shared(blockm,blockp,dSx,dSy,dSz)
do I = 1, nats
Type_pair(1) = symbol(i);
Rax_p(1) = RX(I)+ dx; Rax_p(2) = RY(I); Rax_p(3) = RZ(I)
Expand Down Expand Up @@ -329,10 +386,11 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dSx_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dSx(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dSx_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -348,10 +406,11 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dSy_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dSy(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dSy_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -367,10 +426,11 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec

do jj=1,dimj
do ii=1,dimi
if(abs(blockp(ii,jj,i)).gt.threshold)then
call bml_set_element_new(dSz_bml,hindex(1,i)-1+ii,&
hindex(1,j)-1+jj,blockp(ii,jj,i))
endif
dSz(hindex(1,i)-1+ii,hindex(1,j)-1+jj) = blockp(ii,jj,i)
!if(abs(blockp(ii,jj,i)).gt.threshold)then
! call bml_set_element_new(dSz_bml,hindex(1,i)-1+ii,&
! hindex(1,j)-1+jj,blockp(ii,jj,i))
!endif
enddo
enddo

Expand All @@ -379,6 +439,20 @@ subroutine get_dS(dx,coords,hindex,spindex,intPairsS,onsitesS,symbol,lattice_vec
enddo
enddo
!$omp end parallel do
!bml_type=bml_get_type(dSx_bml) !Get the bml type
call bml_import_from_dense(bml_type,dSx,dSx_bml,threshold,norb) !Dense to dense_bml
call bml_import_from_dense(bml_type,dSy,dSy_bml,threshold,norb) !Dense to dense_bml
call bml_import_from_dense(bml_type,dSz,dSz_bml,threshold,norb) !Dense to dense_bml

if (allocated(dSx)) then
deallocate(dSx)
endif
if (allocated(dSy)) then
deallocate(dSy)
endif
if (allocated(dSz)) then
deallocate(dSz)
endif

end subroutine get_dS

Expand Down
Loading

0 comments on commit e6a7075

Please sign in to comment.