From 09ac6eee0c3f9e41934cc07fdfcb14ae23c5353a Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 8 Nov 2023 10:41:37 -0500 Subject: [PATCH] Add dmconstruction3d benchmark Pseudo atoms arranged in 3d lattice instead of 1d --- benchmarks/CMakeLists.txt | 2 + .../dmconstruction3d/dmconstruction3d.F90 | 146 ++++++++++++++++++ benchmarks/dmconstruction3d/input.in.semicond | 15 ++ benchmarks/dmconstruction3d/run.sh | 31 ++++ src/prg_modelham_mod.F90 | 131 +++++++++++++++- 5 files changed, 324 insertions(+), 1 deletion(-) create mode 100644 benchmarks/dmconstruction3d/dmconstruction3d.F90 create mode 100644 benchmarks/dmconstruction3d/input.in.semicond create mode 100755 benchmarks/dmconstruction3d/run.sh diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 7ca7a2d1..1893702b 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -27,6 +27,8 @@ endfunction(progress_benchmark) progress_benchmark(dmconstruction dmconstruction/dmconstruction.F90) +progress_benchmark(dmconstruction3d dmconstruction3d/dmconstruction3d.F90) + progress_benchmark(dmconstruction_gp dmconstruction_graphBased/dmconstruction_graphBased.F90) progress_benchmark(dmconstruction_gpbio dmconstruction_graphBased/dmconstruction_graphBased_bio.F90 dmconstruction_graphBased/aux_mod.F90) diff --git a/benchmarks/dmconstruction3d/dmconstruction3d.F90 b/benchmarks/dmconstruction3d/dmconstruction3d.F90 new file mode 100644 index 00000000..ff2b8b8f --- /dev/null +++ b/benchmarks/dmconstruction3d/dmconstruction3d.F90 @@ -0,0 +1,146 @@ +!> High-level program to construct a model Hamiltonian +!! +program hmodel + + !BML lib. + use bml + + !PROGRESS lib modes + use prg_modelham_mod + use prg_system_mod + use prg_densitymatrix_mod + use prg_dos_mod + use prg_sp2_mod + use prg_timer_mod + use prg_extras_mod + use prg_parallel_mod + + implicit none + integer, parameter :: dp = kind(1.0d0) + integer :: norbs,seed,i,nreps + integer :: verbose + character(20) :: filename,arg + type(bml_matrix_t) :: ham_bml,rho_bml,rhos_bml,evects_bml,aux_bml + type(mham_type) :: mham + type(system_type) :: sys + real(dp) :: threshold, bndfil, tol, n1d + real(dp), allocatable :: eigenvalues(:) + real(dp) :: ef,sparsity,dec,mlsi,mlsf,bnorm + character(20) :: bml_dmode + + call prg_initParallel() + + if (getNRanks().gt.1)then + if (printRank() .eq. 1)print*,'BML_DMODE_DISTRIBUTED' + bml_dmode = BML_DMODE_DISTRIBUTED + else + print*,'BML_DMODE_SEQUENTIAL' + bml_dmode = BML_DMODE_SEQUENTIAL + endif + + !Parsing input file. + call getarg(1,filename) + call prg_parse_mham(mham,trim(adjustl(filename))) !Reads the input for modelham + nreps=1 + if (iargc().gt.1)then + call getarg(2,arg) + read(arg,*)nreps + endif + !Number of orbitals/matrix size + norbs=mham%norbs + + if (printRank() .eq. 1)print*,'norbs = ',norbs + n1d=norbs/2. + n1d=n1d**(1./3.) + if (printRank() .eq. 1)print*,'n1d = ',n1d + norbs=n1d**3 + norbs=norbs*2 + if (printRank() .eq. 1)print*,'Uses ',norbs,' orbitals' + + !Allocating bml matrices + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,ham_bml, & + & bml_dmode) + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,rho_bml, & + & bml_dmode) + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs, & + & evects_bml, bml_dmode) + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,rhos_bml, & + & bml_dmode) + call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,aux_bml, & + & bml_dmode) + + seed = 1000 !Seed to reproduce the Hamiltonian build + verbose = 1 !Verbosity level + threshold = 1.0d-5 !Threshold value for the matrices through the whole code + bndfil = 0.5d0 !Fraction of orbitals that will be filled + + allocate(eigenvalues(norbs)) + + !Constructing the Hamiltonian + call prg_twolevel_model3d(mham%ea, mham%eb, mham%dab, mham%daiaj, mham%dbibj, & + &mham%dec, mham%rcoeff, mham%reshuffle, mham%seed, ham_bml, verbose) + call bml_threshold(ham_bml, threshold) + call bml_print_matrix("ham_bml",ham_bml,0,10,0,10) + + sparsity = bml_get_sparsity(ham_bml, 1.0D-5) + if (printRank() .eq. 1)write(*,*)"Sparsity Ham=",sparsity + + !Computing the density matrix with diagonalization + if (printRank() .eq. 1)print*,'prg_build_density_T0' + !run loop twice to have an accurate timing in 2nd call + do i =1, nreps + mlsi = mls() + call prg_build_density_T0(ham_bml, rho_bml, threshold, bndfil, eigenvalues) + mlsf = mls() + if (printRank() .eq. 1)write(*,*)"Time_for_prg_build_density_T0",mlsf-mlsi + enddo + print*,'Eigenvalues:' + do i = norbs/2-2, norbs/2+3 + write(*,*)eigenvalues(i) + enddo + + sparsity = bml_get_sparsity(rho_bml, 1.0D-5) + if (printRank() .eq. 1)write(*,*)"Sparsity Rho=",sparsity + + !Getting the fermi level + ef = (eigenvalues(int(norbs/2)+1) + eigenvalues(int(norbs/2)))/2 + eigenvalues = eigenvalues - ef + + !Writting the total DOS + call prg_write_tdos(eigenvalues, 0.05d0, 10000, -20.0d0, 20.0d0, "tdos.dat") + + tol = 2.0D-5*norbs*bndfil + + !run loop twice to have an accurate timing in 2nd call + do i =1, nreps + !Solving for Rho using SP2 + mlsi = mls() + call prg_sp2_alg1(ham_bml,rhos_bml,threshold,bndfil,15,100 & + ,"Rel",tol,20) + mlsf = mls() + if (printRank() .eq. 1)write(*,*)"Time_for_prg_sp2_alg1",mlsf-mlsi + enddo + call bml_print_matrix("rho_bml",rho_bml,0,10,0,10) + call bml_print_matrix("rhos_bml",rhos_bml,0,10,0,10) + + call bml_copy(rhos_bml,aux_bml) + call bml_add(aux_bml,rho_bml,1.0d0,-1.0d0,threshold) + bnorm=bml_fnorm(aux_bml) + if (printRank() .eq. 1)write(*,*)"||DM_sp2-DM_diag||_F",bnorm + + call bml_multiply(rhos_bml, rhos_bml, aux_bml, 0.5_dp, 0.0_dp, threshold) + call bml_print_matrix("rhos_bml^2",aux_bml,0,10,0,10) + call bml_add(aux_bml,rhos_bml,1.0d0,-1.0d0,threshold) + bnorm=bml_fnorm(aux_bml) + if (printRank() .eq. 1)write(*,*)"||DM_sp2-DM_sp2^2||_F",bnorm + + call bml_multiply(ham_bml,rhos_bml,aux_bml,1.0_dp,0.0_dp,threshold) + call bml_multiply(rhos_bml,ham_bml,aux_bml,1.0_dp,-1.0_dp,threshold) + bnorm=bml_fnorm(aux_bml) + if (printRank() .eq. 1)write(*,*)"||DM_sp2*H-H*DM_sp2||_F",bnorm + + !call bml_write_matrix(ham_bml, "hamiltonian.mtx") + + call prg_shutdownParallel() + +end program hmodel diff --git a/benchmarks/dmconstruction3d/input.in.semicond b/benchmarks/dmconstruction3d/input.in.semicond new file mode 100644 index 00000000..5f8003a7 --- /dev/null +++ b/benchmarks/dmconstruction3d/input.in.semicond @@ -0,0 +1,15 @@ +MHAM{ + NOrbs= size + BMLType= format + EpsilonA= -10.0 + EpsilonB= 0.0 + DeltaAB= -2.0 + DeltaAiAj= -0.0 + DeltaBiBj= -1.0 + Decay= -1000.0 + RCoeff= 0.0 + Seed= 100 + Reshuffle= F +} + + diff --git a/benchmarks/dmconstruction3d/run.sh b/benchmarks/dmconstruction3d/run.sh new file mode 100755 index 00000000..3982823f --- /dev/null +++ b/benchmarks/dmconstruction3d/run.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +#source vars !Sourcing environmental variables + +export OMP_NUM_THREADS=8 +run=../../build/dmconstruction3d +nreps=2 + +device="myCPU" #Architecture name +alg="sp2_alg1" #Name of the algorithm +tag="prg_sp2_alg1" #Tag for naming output files + +for format in Ellpack Dense +do + for system in semicond + do + fileout="times_${system}_${alg}_${device}_${format}.dat" + rm $fileout + for i in 1024 2000 3456 8192 11664 16000 + do + echo "Format, System, Size:" $format"," $system"," $i + sed 's/NOrbs=.*/NOrbs= '$i'/g' input.in.$system > tmp + sed 's/BMLType=.*/BMLType= '$format'/g' tmp > input.in + #jsrun -n1 -a1 -g1 -c21 -bpacked:21 ./main input.in $nreps > out$i$device$alg$format$system + $run input.in $nreps > out$i$device$alg$format$system + time=`grep $tag out$i$device$alg$format$system | awk 'NF>1{print $NF}'` + echo $i $time >> $fileout + done + done +done + diff --git a/src/prg_modelham_mod.F90 b/src/prg_modelham_mod.F90 index 95f4c2b9..c85ac656 100644 --- a/src/prg_modelham_mod.F90 +++ b/src/prg_modelham_mod.F90 @@ -28,7 +28,7 @@ module prg_modelham_mod logical :: reshuffle end type mham_type - public :: prg_parse_mham, prg_twolevel_model + public :: prg_parse_mham, prg_twolevel_model, prg_twolevel_model3d contains @@ -201,4 +201,133 @@ subroutine prg_twolevel_model(ea, eb, dab, daiaj, dbibj, dec, rcoeff, reshuffle, end subroutine prg_twolevel_model + !> Construct a two-level model Hamiltonian + !! + !! \param ea First onsite energy + !! \param eb Second onsite energy + !! \param dab Onsite Hamiltonian element + !! \param daiaj Intersite first level Hamiltonian elements + !! \param dbibj Intersite second level Hamiltonian elements + !! \param dec Decay constant + !! \param rcoeff Random coefficient + !! \param reshuffle If rows needs to be reshuffled + !! \param seed Random seed + !! \param h_bml Output hamiltonian matrix + !! \param verbose Verbosity level + subroutine prg_twolevel_model3d(ea, eb, dab, daiaj, dbibj, dec, rcoeff, reshuffle, & + & seed, h_bml, verbose) + real(dp), intent(in) :: ea, eb, dab, daiaj, dbibj, rcoeff + integer, intent(in) :: verbose, seed + integer, allocatable :: seedin(:) + logical, intent(in) :: reshuffle + type(bml_matrix_t),intent(inout) :: h_bml + real(dp), allocatable :: diagonal(:), row1(:), row2(:), rowi(:), rowj(:) + type(bml_matrix_t) :: ht_bml + integer :: norbs, i, j, ssize, i1, j1, k1, i2, j2, k2, n1d + real(dp) :: dec, dist, di, dj, dk, ran, tmp + + norbs = bml_get_N(h_bml) + + allocate(diagonal(norbs)) + allocate(row1(norbs)) + allocate(row2(norbs)) + + tmp = norbs + tmp = tmp*0.5 + tmp = tmp**(1./3.) + n1d = int(tmp) + print*,'n1d = ',n1d + + call random_seed() + call random_seed(size=ssize) + allocate(seedin(ssize)) + seedin = seed + call random_seed(PUT=seedin) + + do i=1,norbs + if(mod(i,2) == 0)then + call random_number(ran) + diagonal(i) = ea + rcoeff*(2.0_dp*ran - 1.0_dp) + else + call random_number(ran) + diagonal(i) = eb + rcoeff*(2.0_dp*ran - 1.0_dp) + endif + enddo + + do i1=1,n1d + do j1=1,n1d + do k1=1,n1d + i = (i1-1) + n1d*(j1-1) + n1d*n1d*(k1-1) + do i2=1,n1d + do j2=1,n1d + do k2=1,n1d + j = (i2-1) + n1d*(j2-1) + n1d*n1d*(k2-1) + if(abs(real(i1-i2,dp)) <= n1d/2.0d0) then + di = max(abs(real(i1-i2,dp))- 2.0_dp,0.0_dp) + else + di = max((-abs(real(i1-i2,dp))+n1d)- 2.0_dp,0.0_dp) + endif + if(abs(real(j1-j2,dp)) <= n1d/2.0d0) then + dj = max(abs(real(j1-j2,dp))- 2.0_dp,0.0_dp) + else + dj = max((-abs(real(j1-j2,dp))+n1d)- 2.0_dp,0.0_dp) + endif + if(abs(real(k1-k2,dp)) <= n1d/2.0d0) then + dk = max(abs(real(k1-k2,dp))- 2.0_dp,0.0_dp) + else + dk = max((-abs(real(k1-k2,dp))+n1d)- 2.0_dp,0.0_dp) + endif + dist = dsqrt(di+di+dj*dj+dk*dk) + + !assign matrix elements for all interactions between two atoms + !A-A type + call random_number(ran) + row1(2*j+1) = (daiaj + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist) + !A-B type + call random_number(ran) + row1(2*j+2) = (dab + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist) + row2(2*j+1) = (dab + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist) + !B-B type + call random_number(ran) + row2(2*j+2) = (dbibj + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist) + ! write(*,*)i,j,row(j),mod(i,2),mod(j,2),abs(real(i-j,dp)+norbs),abs(real(i-j,dp)+norbs) + enddo + enddo + enddo + call bml_set_row(h_bml,2*i+1,row1) + call bml_set_row(h_bml,2*i+2,row2) + enddo + enddo + enddo + + call bml_set_diagonal(h_bml,diagonal) + + !Symmetrization (necessary when random factor used) + call bml_copy_new(h_bml,ht_bml) + call bml_transpose(h_bml,ht_bml) + if(verbose.gt.0)then + call bml_print_matrix("h_bml",h_bml,0,10,0,10) + call bml_print_matrix("ht_bml",ht_bml,0,10,0,10) + endif + call bml_add(h_bml,ht_bml,0.5d0,0.5d0,0.0d0) + + call bml_deallocate(ht_bml) + + if(reshuffle)then + allocate(rowj(norbs)) + allocate(rowi(norbs)) + do i=1,norbs + call random_number(ran) + j = int(floor(ran*norbs+1)) + call bml_get_row(h_bml,i,rowi) + call bml_get_row(h_bml,j,rowj) + call bml_set_row(h_bml,i,rowj) + call bml_set_row(h_bml,j,rowi) + enddo + deallocate(rowi) + deallocate(rowj) + endif + + end subroutine prg_twolevel_model3d + end module prg_modelham_mod