From 09ac6eee0c3f9e41934cc07fdfcb14ae23c5353a Mon Sep 17 00:00:00 2001
From: Jean-Luc Fattebert <fattebertj@ornl.gov>
Date: Wed, 8 Nov 2023 10:41:37 -0500
Subject: [PATCH] Add dmconstruction3d benchmark

Pseudo atoms arranged in 3d lattice instead of 1d
---
 benchmarks/CMakeLists.txt                     |   2 +
 .../dmconstruction3d/dmconstruction3d.F90     | 146 ++++++++++++++++++
 benchmarks/dmconstruction3d/input.in.semicond |  15 ++
 benchmarks/dmconstruction3d/run.sh            |  31 ++++
 src/prg_modelham_mod.F90                      | 131 +++++++++++++++-
 5 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/dmconstruction3d/dmconstruction3d.F90
 create mode 100644 benchmarks/dmconstruction3d/input.in.semicond
 create mode 100755 benchmarks/dmconstruction3d/run.sh

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
index 7ca7a2d1..1893702b 100644
--- a/benchmarks/CMakeLists.txt
+++ b/benchmarks/CMakeLists.txt
@@ -27,6 +27,8 @@ endfunction(progress_benchmark)
 
 progress_benchmark(dmconstruction dmconstruction/dmconstruction.F90)
 
+progress_benchmark(dmconstruction3d dmconstruction3d/dmconstruction3d.F90)
+
 progress_benchmark(dmconstruction_gp dmconstruction_graphBased/dmconstruction_graphBased.F90)
 
 progress_benchmark(dmconstruction_gpbio dmconstruction_graphBased/dmconstruction_graphBased_bio.F90 dmconstruction_graphBased/aux_mod.F90)
diff --git a/benchmarks/dmconstruction3d/dmconstruction3d.F90 b/benchmarks/dmconstruction3d/dmconstruction3d.F90
new file mode 100644
index 00000000..ff2b8b8f
--- /dev/null
+++ b/benchmarks/dmconstruction3d/dmconstruction3d.F90
@@ -0,0 +1,146 @@
+!> High-level program to construct a model Hamiltonian
+!!
+program hmodel
+
+  !BML lib.
+  use bml
+
+  !PROGRESS lib modes
+  use prg_modelham_mod
+  use prg_system_mod
+  use prg_densitymatrix_mod
+  use prg_dos_mod
+  use prg_sp2_mod
+  use prg_timer_mod
+  use prg_extras_mod
+  use prg_parallel_mod
+
+  implicit none
+  integer, parameter ::  dp = kind(1.0d0)
+  integer ::  norbs,seed,i,nreps
+  integer ::  verbose
+  character(20) :: filename,arg
+  type(bml_matrix_t) ::  ham_bml,rho_bml,rhos_bml,evects_bml,aux_bml
+  type(mham_type) ::  mham
+  type(system_type) ::  sys
+  real(dp) ::  threshold, bndfil, tol, n1d
+  real(dp), allocatable :: eigenvalues(:)
+  real(dp) :: ef,sparsity,dec,mlsi,mlsf,bnorm
+  character(20) :: bml_dmode
+
+  call prg_initParallel()
+
+  if (getNRanks().gt.1)then
+    if (printRank() .eq. 1)print*,'BML_DMODE_DISTRIBUTED'
+    bml_dmode = BML_DMODE_DISTRIBUTED
+  else
+    print*,'BML_DMODE_SEQUENTIAL'
+    bml_dmode = BML_DMODE_SEQUENTIAL
+  endif
+
+  !Parsing input file.
+  call getarg(1,filename)
+  call prg_parse_mham(mham,trim(adjustl(filename))) !Reads the input for modelham
+  nreps=1
+  if (iargc().gt.1)then
+    call getarg(2,arg)
+    read(arg,*)nreps
+  endif
+  !Number of orbitals/matrix size
+  norbs=mham%norbs
+
+  if (printRank() .eq. 1)print*,'norbs = ',norbs
+  n1d=norbs/2.
+  n1d=n1d**(1./3.)
+  if (printRank() .eq. 1)print*,'n1d = ',n1d
+  norbs=n1d**3
+  norbs=norbs*2
+  if (printRank() .eq. 1)print*,'Uses ',norbs,' orbitals'
+
+  !Allocating bml matrices
+  call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,ham_bml, &
+       &               bml_dmode)
+  call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,rho_bml, &
+       &               bml_dmode)
+  call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs, &
+       &               evects_bml, bml_dmode)
+  call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,rhos_bml, &
+       &               bml_dmode)
+  call bml_zero_matrix(mham%bml_type,bml_element_real,dp,norbs,norbs,aux_bml, &
+       &               bml_dmode)
+
+  seed = 1000 !Seed to reproduce the Hamiltonian build
+  verbose = 1 !Verbosity level
+  threshold = 1.0d-5 !Threshold value for the matrices through the whole code
+  bndfil = 0.5d0 !Fraction of orbitals that will be filled
+
+  allocate(eigenvalues(norbs))
+
+  !Constructing the Hamiltonian
+  call prg_twolevel_model3d(mham%ea, mham%eb, mham%dab, mham%daiaj, mham%dbibj, &
+       &mham%dec, mham%rcoeff, mham%reshuffle, mham%seed, ham_bml, verbose)
+  call bml_threshold(ham_bml, threshold)
+  call bml_print_matrix("ham_bml",ham_bml,0,10,0,10)
+
+  sparsity = bml_get_sparsity(ham_bml, 1.0D-5)
+  if (printRank() .eq. 1)write(*,*)"Sparsity Ham=",sparsity
+
+  !Computing the density matrix with diagonalization
+  if (printRank() .eq. 1)print*,'prg_build_density_T0'
+  !run loop twice to have an accurate timing in 2nd call
+  do i =1, nreps
+    mlsi = mls()
+    call prg_build_density_T0(ham_bml, rho_bml, threshold, bndfil, eigenvalues)
+    mlsf = mls()
+    if (printRank() .eq. 1)write(*,*)"Time_for_prg_build_density_T0",mlsf-mlsi
+  enddo
+  print*,'Eigenvalues:'
+  do i = norbs/2-2, norbs/2+3
+    write(*,*)eigenvalues(i)
+  enddo
+
+  sparsity = bml_get_sparsity(rho_bml, 1.0D-5)
+  if (printRank() .eq. 1)write(*,*)"Sparsity Rho=",sparsity
+
+  !Getting the fermi level
+  ef = (eigenvalues(int(norbs/2)+1) + eigenvalues(int(norbs/2)))/2
+  eigenvalues = eigenvalues - ef
+
+  !Writting the total DOS
+  call prg_write_tdos(eigenvalues, 0.05d0, 10000, -20.0d0, 20.0d0, "tdos.dat")
+
+  tol = 2.0D-5*norbs*bndfil
+
+  !run loop twice to have an accurate timing in 2nd call
+  do i =1, nreps
+    !Solving for Rho using SP2
+    mlsi = mls()
+    call prg_sp2_alg1(ham_bml,rhos_bml,threshold,bndfil,15,100 &
+         ,"Rel",tol,20)
+    mlsf = mls()
+    if (printRank() .eq. 1)write(*,*)"Time_for_prg_sp2_alg1",mlsf-mlsi
+  enddo
+  call bml_print_matrix("rho_bml",rho_bml,0,10,0,10)
+  call bml_print_matrix("rhos_bml",rhos_bml,0,10,0,10)
+
+  call bml_copy(rhos_bml,aux_bml)
+  call bml_add(aux_bml,rho_bml,1.0d0,-1.0d0,threshold)
+  bnorm=bml_fnorm(aux_bml)
+  if (printRank() .eq. 1)write(*,*)"||DM_sp2-DM_diag||_F",bnorm
+
+  call bml_multiply(rhos_bml, rhos_bml, aux_bml, 0.5_dp, 0.0_dp, threshold)
+  call bml_print_matrix("rhos_bml^2",aux_bml,0,10,0,10)
+  call bml_add(aux_bml,rhos_bml,1.0d0,-1.0d0,threshold)
+  bnorm=bml_fnorm(aux_bml)
+  if (printRank() .eq. 1)write(*,*)"||DM_sp2-DM_sp2^2||_F",bnorm
+
+  call bml_multiply(ham_bml,rhos_bml,aux_bml,1.0_dp,0.0_dp,threshold)
+  call bml_multiply(rhos_bml,ham_bml,aux_bml,1.0_dp,-1.0_dp,threshold)
+  bnorm=bml_fnorm(aux_bml)
+  if (printRank() .eq. 1)write(*,*)"||DM_sp2*H-H*DM_sp2||_F",bnorm
+
+  !call bml_write_matrix(ham_bml, "hamiltonian.mtx")
+
+  call prg_shutdownParallel()
+
+end program hmodel
diff --git a/benchmarks/dmconstruction3d/input.in.semicond b/benchmarks/dmconstruction3d/input.in.semicond
new file mode 100644
index 00000000..5f8003a7
--- /dev/null
+++ b/benchmarks/dmconstruction3d/input.in.semicond
@@ -0,0 +1,15 @@
+MHAM{
+  NOrbs= size
+  BMLType= format
+  EpsilonA= -10.0
+  EpsilonB=  0.0
+  DeltaAB= -2.0
+  DeltaAiAj= -0.0
+  DeltaBiBj= -1.0
+  Decay= -1000.0
+  RCoeff= 0.0
+  Seed= 100
+  Reshuffle= F
+}
+
+
diff --git a/benchmarks/dmconstruction3d/run.sh b/benchmarks/dmconstruction3d/run.sh
new file mode 100755
index 00000000..3982823f
--- /dev/null
+++ b/benchmarks/dmconstruction3d/run.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+#source vars !Sourcing environmental variables
+
+export OMP_NUM_THREADS=8
+run=../../build/dmconstruction3d
+nreps=2
+
+device="myCPU" #Architecture name
+alg="sp2_alg1" #Name of the algorithm 
+tag="prg_sp2_alg1" #Tag for naming output files
+
+for format in Ellpack Dense
+do
+  for system in semicond
+  do
+    fileout="times_${system}_${alg}_${device}_${format}.dat"
+    rm $fileout
+    for i in 1024 2000 3456 8192 11664 16000
+    do
+      echo "Format, System, Size:" $format"," $system"," $i
+      sed 's/NOrbs=.*/NOrbs= '$i'/g' input.in.$system > tmp
+      sed 's/BMLType=.*/BMLType= '$format'/g' tmp > input.in
+      #jsrun -n1 -a1 -g1 -c21 -bpacked:21 ./main input.in $nreps > out$i$device$alg$format$system
+      $run  input.in $nreps > out$i$device$alg$format$system
+      time=`grep $tag out$i$device$alg$format$system | awk 'NF>1{print $NF}'`
+      echo $i $time  >> $fileout
+    done
+  done
+done
+
diff --git a/src/prg_modelham_mod.F90 b/src/prg_modelham_mod.F90
index 95f4c2b9..c85ac656 100644
--- a/src/prg_modelham_mod.F90
+++ b/src/prg_modelham_mod.F90
@@ -28,7 +28,7 @@ module prg_modelham_mod
     logical :: reshuffle
   end type mham_type
 
-  public :: prg_parse_mham, prg_twolevel_model
+  public :: prg_parse_mham, prg_twolevel_model, prg_twolevel_model3d
 
 contains
 
@@ -201,4 +201,133 @@ subroutine prg_twolevel_model(ea, eb, dab, daiaj, dbibj, dec, rcoeff, reshuffle,
 
   end subroutine prg_twolevel_model
 
+  !> Construct a two-level model Hamiltonian
+  !!
+  !! \param ea First onsite energy
+  !! \param eb Second onsite energy
+  !! \param dab Onsite Hamiltonian element
+  !! \param daiaj Intersite first level Hamiltonian elements
+  !! \param dbibj Intersite second level Hamiltonian elements
+  !! \param dec Decay constant
+  !! \param rcoeff Random coefficient
+  !! \param reshuffle If rows needs to be reshuffled
+  !! \param seed Random seed
+  !! \param h_bml Output hamiltonian matrix
+  !! \param verbose Verbosity level
+  subroutine prg_twolevel_model3d(ea, eb, dab, daiaj, dbibj, dec, rcoeff, reshuffle, &
+       & seed, h_bml, verbose)
+    real(dp), intent(in) :: ea, eb, dab, daiaj, dbibj, rcoeff
+    integer, intent(in) :: verbose, seed
+    integer, allocatable :: seedin(:)
+    logical, intent(in) :: reshuffle
+    type(bml_matrix_t),intent(inout) :: h_bml
+    real(dp), allocatable :: diagonal(:), row1(:), row2(:), rowi(:), rowj(:)
+    type(bml_matrix_t) :: ht_bml
+    integer :: norbs, i, j, ssize, i1, j1, k1, i2, j2, k2, n1d
+    real(dp) :: dec, dist, di, dj, dk, ran, tmp
+
+    norbs = bml_get_N(h_bml)
+
+    allocate(diagonal(norbs))
+    allocate(row1(norbs))
+    allocate(row2(norbs))
+
+    tmp = norbs
+    tmp = tmp*0.5
+    tmp = tmp**(1./3.)
+    n1d = int(tmp)
+    print*,'n1d = ',n1d
+
+    call random_seed()
+    call random_seed(size=ssize)
+    allocate(seedin(ssize))
+    seedin = seed
+    call random_seed(PUT=seedin)
+
+    do i=1,norbs
+      if(mod(i,2) == 0)then
+        call random_number(ran)
+        diagonal(i) = ea + rcoeff*(2.0_dp*ran - 1.0_dp)
+      else
+        call random_number(ran)
+        diagonal(i) = eb + rcoeff*(2.0_dp*ran - 1.0_dp)
+      endif
+    enddo
+
+    do i1=1,n1d
+      do j1=1,n1d
+        do k1=1,n1d
+          i = (i1-1) + n1d*(j1-1) + n1d*n1d*(k1-1)
+          do i2=1,n1d
+            do j2=1,n1d
+              do k2=1,n1d
+                j = (i2-1) + n1d*(j2-1) + n1d*n1d*(k2-1)
+                if(abs(real(i1-i2,dp)) <= n1d/2.0d0) then
+                  di = max(abs(real(i1-i2,dp))- 2.0_dp,0.0_dp)
+                else
+                  di = max((-abs(real(i1-i2,dp))+n1d)- 2.0_dp,0.0_dp)
+                endif
+                if(abs(real(j1-j2,dp)) <= n1d/2.0d0) then
+                  dj = max(abs(real(j1-j2,dp))- 2.0_dp,0.0_dp)
+                else
+                  dj = max((-abs(real(j1-j2,dp))+n1d)- 2.0_dp,0.0_dp)
+                endif
+                if(abs(real(k1-k2,dp)) <= n1d/2.0d0) then
+                  dk = max(abs(real(k1-k2,dp))- 2.0_dp,0.0_dp)
+                else
+                  dk = max((-abs(real(k1-k2,dp))+n1d)- 2.0_dp,0.0_dp)
+                endif
+                dist = dsqrt(di+di+dj*dj+dk*dk)
+
+                !assign matrix elements for all interactions between two atoms
+                !A-A type
+                call random_number(ran)
+                row1(2*j+1) = (daiaj + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist)
+                !A-B type
+                call random_number(ran)
+                row1(2*j+2) = (dab + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist)
+                row2(2*j+1) = (dab + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist)
+                !B-B type
+                call random_number(ran)
+                row2(2*j+2) = (dbibj + rcoeff*(2.0_dp*ran - 1.0_dp))*exp(dec*dist)
+                ! write(*,*)i,j,row(j),mod(i,2),mod(j,2),abs(real(i-j,dp)+norbs),abs(real(i-j,dp)+norbs)
+              enddo
+            enddo
+          enddo
+          call bml_set_row(h_bml,2*i+1,row1)
+          call bml_set_row(h_bml,2*i+2,row2)
+        enddo
+      enddo
+    enddo
+
+    call bml_set_diagonal(h_bml,diagonal)
+
+    !Symmetrization (necessary when random factor used)
+    call bml_copy_new(h_bml,ht_bml)
+    call bml_transpose(h_bml,ht_bml)
+    if(verbose.gt.0)then
+      call bml_print_matrix("h_bml",h_bml,0,10,0,10)
+      call bml_print_matrix("ht_bml",ht_bml,0,10,0,10)
+    endif
+    call bml_add(h_bml,ht_bml,0.5d0,0.5d0,0.0d0)
+
+    call bml_deallocate(ht_bml)
+
+    if(reshuffle)then
+      allocate(rowj(norbs))
+      allocate(rowi(norbs))
+      do i=1,norbs
+        call random_number(ran)
+        j = int(floor(ran*norbs+1))
+        call bml_get_row(h_bml,i,rowi)
+        call bml_get_row(h_bml,j,rowj)
+        call bml_set_row(h_bml,i,rowj)
+        call bml_set_row(h_bml,j,rowi)
+      enddo
+      deallocate(rowi)
+      deallocate(rowj)
+    endif
+
+  end subroutine prg_twolevel_model3d
+
 end module prg_modelham_mod