Merge branch 'develop' into mp_reduction_fix_4767

QMCPACK · Oct 25, 2023 · 77e2b77 · 77e2b77
2 parents aceffb2 + 49d40c5
commit 77e2b77
Show file tree

Hide file tree

Showing 69 changed files with 2,284 additions and 1,498 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -368,32 +368,34 @@ if(alignment_remainder)
 endif()
 message(STATUS "QMC_SIMD_ALIGNMENT is set to ${QMC_SIMD_ALIGNMENT}")
 
-#---------------------------------------------------------
-# Determine if OpenMP taskloop works with the CXX compiler
-#---------------------------------------------------------
-include(TestOpenMPtaskloop)
-option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY})
-message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}")
-
-#---------------------------------------------------------
-# Set up OpenMP offload compile options
-#---------------------------------------------------------
-set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF)
-if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS)
-  message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}")
-  if(${COMPILER} MATCHES "Clang"
-     AND OPENMP_OFFLOAD_COMPILE_OPTIONS MATCHES "gfx"
-     AND QMC_CUDA2HIP)
-    # As of 11/2021, QMC_OFFLOAD_MEM_ASSOCIATED=ON is needed for AMD and mainline LLVM compilers
-    # when using OpenMP offload to AMD GPU together with HIP.
-    set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON)
+if(QMC_OMP)
+  #---------------------------------------------------------
+  # Determine if OpenMP taskloop works with the CXX compiler
+  #---------------------------------------------------------
+  include(TestOpenMPtaskloop)
+  option(ENABLE_OMP_TASKLOOP "Enable OpenMP taskloop" ${OMP_TASKLOOP_OKAY})
+  message(STATUS "ENABLE_OMP_TASKLOOP is set to ${ENABLE_OMP_TASKLOOP}")
+
+  #---------------------------------------------------------
+  # Set up OpenMP offload compile options
+  #---------------------------------------------------------
+  set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT OFF)
+  if(ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS)
+    message(STATUS "OpenMP offload CXX flags: ${OPENMP_OFFLOAD_COMPILE_OPTIONS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}")
+    if(${COMPILER} MATCHES "Clang"
+	AND OPENMP_OFFLOAD_COMPILE_OPTIONS MATCHES "gfx"
+	AND QMC_CUDA2HIP)
+      # As of 11/2021, QMC_OFFLOAD_MEM_ASSOCIATED=ON is needed for AMD and mainline LLVM compilers
+      # when using OpenMP offload to AMD GPU together with HIP.
+      set(QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT ON)
+    endif()
   endif()
+  # Some OpenMP offload runtime libraries have composibility issue with a vendor native runtime.
+  # A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them.
+  cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime"
+    ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF)
 endif()
-# Some OpenMP offload runtime libraries have composibility issue with a vendor native runtime.
-# A workaround is making the vendor native runtime responsible for memory allocations and OpenMP associate/disassocate them.
-cmake_dependent_option(QMC_OFFLOAD_MEM_ASSOCIATED "Manage OpenMP memory allocations via the vendor runtime"
-                       ${QMC_OFFLOAD_MEM_ASSOCIATED_DEFAULT} "ENABLE_OFFLOAD;ENABLE_CUDA" OFF)
 
 #-------------------------------------------------------------------------------------
 # consider making this always on if OpenMP is no longer UB with Thread Support Library

diff --git a/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml b/external_codes/mpi_wrapper/mpi3/.gitlab-ci.yml
@@ -1,15 +1,15 @@
 # -*-indent-tabs-mode:nil;c-basic-offset:2;tab-width:4;-*-
 # Copyright 2020-2023 Alfredo A. Correa
 
-image: correaadock/gnudev:v2
+image: debian:testing
 
 variables:
   GIT_SUBMODULE_STRATEGY: recursive
 
 openmpi:
   stage: build
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev
     - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3
     - cd test
     - mkdir build && cd build
@@ -18,12 +18,43 @@ openmpi:
     - cmake --build . --parallel 2 || make VERBOSE=1
     - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure
 
+exampi:
+  allow_failure: true
+  stage: build
+  script:
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake g++ git libboost-serialization-dev make python3 ssh strace  #  ca-certificates libopenmpi-dev openmpi-bin cmake make g++ git libboost-serialization-dev
+    - git clone https://correaa:${EXATOKEN}@github.com/tonyskjellum/ExaMPI.git
+    - cd ExaMPI
+    - mkdir build && cd build
+    - cmake .. --install-prefix=$HOME/exa
+    - make -j 4
+    - make install
+    - export PATH=$HOME/exa/bin:$PATH
+    - export PATH=$HOME/exa/runtime:$PATH
+    - export LD_LIBRARY_PATH=$HOME/exa/lib:$LD_LIBRARY_PATH
+    - export MPI_PATH=$HOME/exa/bin
+    - export MPI_LIB=$HOME/exa/lib
+    - export MPI_INC=$HOME/exa/include
+    - export MPI_HOME=$HOME/exa
+    - which mpicxx
+    - which mpirun
+    - strace mpirun -n 4 tests/integration_tests/allreduce
+    - strace mpirun -n 4 tests/integration_tests/alltoall
+    - ctest --output-on-failure
+    - cd ../..
+    - mkdir build && cd build
+    - which mpicxx
+    - mpicxx --version
+    - cmake .. -DCMAKE_BUILD_TYPE=Debug -DUSE_EXAMPI=1 -DMPI_HOME=$HOME/exa
+    - make -j 2 || make VERBOSE=1
+    - ls
+    - ctest --output-on-failure
+
 icpc-intelmpi:
   stage: build
   image: intel/oneapi-hpckit:latest
-  allow_failure: true
   script:
-    - apt-get update && apt-get install --no-install-recommends -y --quiet ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev
+    - apt-get update && apt-get install --no-install-recommends -y --quiet bash ca-certificates cmake curl g++ git make libboost-test-dev libboost-serialization-dev
     - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3
     - cd test
     - mkdir build && cd build
@@ -50,8 +81,9 @@ icpx-intelmpi:
 
 openmpi-clang:
   stage: build
+  image: debian:testing
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang g++ git libstdc++-12-dev libboost-serialization-dev
     - cd .. && ln -s boost-mpi3 mpi3 && cd mpi3
     - cd test
     - mkdir build && cd build
@@ -63,10 +95,11 @@ openmpi-clang:
     - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure
   needs: ["openmpi"]
 
-openmpi-clang20:
+"openmpi-clang20":
   stage: build
+  image: debian:stable
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin cmake make clang git libstdc++-12-dev libboost-serialization-dev
     - cd test
     - mkdir build && cd build
     - export MPI_OVERSUBSCRIBE="--oversubscribe"
@@ -80,7 +113,7 @@ openmpi-clang20:
 openmpi-clang-tidy:
   stage: build
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates libopenmpi-dev openmpi-bin clang libstdc++-12-dev clang-tidy cmake git make libboost-serialization-dev
     - mkdir build && cd build
     - export MPI_OVERSUBSCRIBE="--oversubscribe"
     - clang++ --version
@@ -93,7 +126,7 @@ openmpi-clang-tidy:
 openmpi-cppcheck:
   stage: build
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev
     - mkdir build && cd build
     - export MPI_OVERSUBSCRIBE="--oversubscribe"
     - g++ --version
@@ -106,10 +139,12 @@ openmpi-cppcheck:
 mpich-debug:
   stage: build
   script:
-    - apt update -qq && apt install -qq -y --no-install-recommends libmpich-dev mpich
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends libopenmpi-dev openmpi-bin g++ libstdc++-12-dev ca-certificates cmake cppcheck git make libboost-serialization-dev
     - cd test
     - mkdir build && cd build
     - export MPI_OVERSUBSCRIBE=""
+    - export OMPI_ALLOW_RUN_AS_ROOT=1
+    - export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 
     - cmake .. -DCMAKE_BUILD_TYPE=Debug
     - cmake --build . --parallel 2 || cmake --build . --verbose
     - ctest --output-on-failure
@@ -118,7 +153,7 @@ mpich-valgrind:
   stage: build
   allow_failure: true
   script:
-    - apt update -qq && apt-get install -qq -y --no-install-recommends libmpich-dev mpich
+    - apt-get update -qq && apt-get install -qq -y --no-install-recommends ca-certificates cmake git libboost-test-dev libboost-serialization-dev libmpich-dev make mpich valgrind
     - mpirun --version
     - mkdir build && cd build
     - export MPI_OVERSUBSCRIBE=""
@@ -132,6 +167,7 @@ mpich-valgrind:
 
 qmcpack-openmpi:
   stage: test
+  image: debian:testing
   script:
     - apt-get -qq update && apt-get -qq install --no-install-recommends -y libblas-dev liblapack-dev libfftw3-dev libboost-serialization-dev libopenmpi-dev gfortran g++ cmake make git ca-certificates numdiff python3 python3-numpy python3-h5py python3-mpi4py python3-scipy libxml2-dev libhdf5-dev
     - git clone https://github.com/QMCPACK/qmcpack.git
@@ -146,7 +182,7 @@ qmcpack-openmpi:
   needs: ["openmpi"]
 
 qmcpack-cuda-runner:
-  allow_failure: true
+  allow_failure: false
   image: nvcr.io/nvidia/cuda:11.8.0-devel-ubuntu22.04
   tags:
     - nvidia-docker
@@ -161,25 +197,26 @@ qmcpack-cuda-runner:
     - git subtree add --squash -P external_codes/mpi3 $CI_REPOSITORY_URL $CI_COMMIT_BRANCH # e.g. https://gitlab.com/correaa/boost-multi.git
     - cd ../qmcpack
     - cd build
-    - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" ..
+    - CUDACXX=/usr/local/cuda/bin/nvcc cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DBUILD_AFQMC=1 -DQMC_CXX_STANDARD=17 -DENABLE_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc -DQMC_GPU_ARCHS=sm_61 -DCMAKE_CUDA_HOST_COMPILER=g++ -DCMAKE_CXX_FLAGS="-Wno-deprecated -Wno-deprecated-declarations" ..
     - make -j4 afqmc test_afqmc_matrix test_afqmc_numerics test_afqmc_slaterdeterminantoperations test_afqmc_walkers test_afqmc_hamiltonians test_afqmc_hamiltonian_operations test_afqmc_phmsd test_afqmc_wfn_factory test_afqmc_prop_factory test_afqmc_estimators qmc-afqmc-performance
     - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest -R afqmc --output-on-failure
   needs: ["openmpi-cuda-11", "qmcpack-openmpi"]
 
 inq-openmpi:
   stage: test
+  image: debian:testing
   tags:
     - cpu
   script:
-    - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config make git ca-certificates wget
+    - apt-get update && apt-get install --no-install-recommends -y --quiet libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libboost-iostreams-dev libopenmpi-dev libhdf5-dev gfortran g++ cmake pkg-config python3-dev make git ca-certificates wget
     - cmake --version
     - git clone https://gitlab.com/npneq/inq.git --recurse-submodules
     - cd inq
     - cd external_libs/mpi3
-    - git checkout $CI_COMMIT_BRANCH
+    - git checkout $CI_COMMIT_BRANCH  # check that multi repo is mirrored correctly from this repo to the submodule repo (npneq)
     - cd ../..
     - mkdir build && cd build
-    - ../configure --prefix=$HOME --disable-debug
+    - cmake .. --install-prefix=$HOME -DCMAKE_BUILD_TYPE=Release
     - make --jobs=2 || make VERBOSE=1
     - make install
     - OMPI_ALLOW_RUN_AS_ROOT=1 OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 ctest --output-on-failure
@@ -222,7 +259,7 @@ inq-cuda-11-openmpi-compileonly:
   tags:
     - nvidia-docker
   script:
-    - apt-get update && apt-get install --no-install-recommends -y --quiet cmake libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev gfortran g++ pkg-config make git ca-certificates wget
+    - apt-get update && apt-get install --no-install-recommends -y --quiet cmake git libblas-dev liblapack-dev libfftw3-dev libboost-filesystem-dev libboost-iostreams-dev libboost-serialization-dev libopenmpi-dev libhdf5-dev ca-certificatesgfortran g++ make pkg-config python3-dev wget
     - cmake --version
     - git clone https://gitlab.com/npneq/inq.git --recurse-submodules
     - cd inq
@@ -231,7 +268,7 @@ inq-cuda-11-openmpi-compileonly:
     - cd ../..
     - mkdir build && cd build
     - /usr/local/cuda-11/bin/nvcc -V
-    - CUDA_ARCH_OVERRIDE=1 ../configure --prefix=$HOME --enable-cuda --with-cuda-prefix=/usr/local/cuda --pass-thru -DCMAKE_CUDA_COMPILER=/usr/local/cuda-11/bin/nvcc -DCMAKE_CUDA_ARCHITECTURES=61
+    - CUDACXX=/usr/local/cuda/bin/nvcc cmake .. --install-prefix=$HOME -DENABLE_CUDA=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_CUDA_ARCHITECTURES=61
     - make silicon --jobs=2
     - make install
     - ctest -R silicon

diff --git a/external_codes/mpi_wrapper/mpi3/CMakeLists.txt b/external_codes/mpi_wrapper/mpi3/CMakeLists.txt
@@ -2,27 +2,37 @@
 cmake_minimum_required(VERSION 3.16)
 
 project(
-	mpi3
+	bmpi3
 	VERSION 0.79.0
 	DESCRIPTION "B-MPI3 is a C++ library wrapper for version 3.1 of the MPI standard interface that simplifies the utilization and maintenance of MPI code."
 	HOMEPAGE_URL "https://gitlab.com/correaa/boost-mpi3"
 	LANGUAGES CXX
 )
 
-include(GNUInstallDirs)
+find_package(MPI REQUIRED)  # might need to `module load mpi`
 
 add_library(${PROJECT_NAME} INTERFACE)
 
+target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17)
 target_include_directories(${PROJECT_NAME} INTERFACE $<BUILD_INTERFACE:${${PROJECT_NAME}_SOURCE_DIR}/include> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> $<BUILD_INTERFACE:${${PROJECT_NAME}_SOURCE_DIR}/include/mpi3/dummy>)
+target_link_libraries(${PROJECT_NAME} INTERFACE MPI::MPI_CXX)
 
-target_compile_features(${PROJECT_NAME} INTERFACE cxx_std_17)
-#set_target_properties(${PROJECT_NAME} PROPERTIES CXX_EXTENSIONS OFF)
+# to use this project directly from CMake
+# FetchContent_Declare(
+#     bmpi3
+#     GIT_REPOSITORY         git@gitlab.com:correaa/boost-mpi3.git  # https://gitlab.com/correaa/boost-mpi3.git
+#     GIT_TAG                master)
+# FetchContent_MakeAvailable(bmpi3)
+# add_executable(main main.cpp)
+# target_link_libraries(main PUBLIC bmpi3)
 
 # this makes CM FetchContent friendly https://www.foonathan.net/2022/06/cmake-fetchcontent/
 if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
-  	return()
+	return()
 endif()
 
+include(GNUInstallDirs)
+
 include(CTest)
 
 enable_testing()

diff --git a/external_codes/mpi_wrapper/mpi3/README.md b/external_codes/mpi_wrapper/mpi3/README.md
@@ -81,27 +81,32 @@ It turns out that this interface was a very minimal change over the C version, a
 The B.MPI3 library was designed to use simultaneously (interleaved) with the standard C interface of MPI.
 In this way, changes to existing code can be made incrementally.
 
-## Installation
+## Usage
 
-The library is "header-only"; no separate compilation is necessary.
-In order to compile it requires an MPI distribution (e.g. OpenMPI or MPICH2) and the corresponding compiler-wrapper (`mpic++` or `mpicxx`).
-This library requires C++14 and the Boost library installed.
+The library is "header-only"; no separate compilation or configuration of the library is necessary.
+It requires an MPI distribution (e.g. OpenMPI or MPICH2), a C++14 compiler and Boost libraries installed.
 A typical compilation/run command looks like this:
 
 ```bash
-$ mpic++ -std=c++14 -O3 mpi3/test/communicator_send.cpp -o communicator_send.x -lboost_serialization
+$ mpic++ communicator_send.cpp -o communicator_send.x -lboost_serialization
 $ mpirun -n 8 ./communicator_send.x
 ```
 
-In a system such as Red Hat, the dependencies can by installed by
+In a system such as Red Hat or Fedora, the dependencies can by installed by `sudo dnf install gcc-c++ boost-devel openmpi-devel mpich-devel`.
 
-```bash
-dnf install gcc-c++ boost-devel openmpi-devel mpich-devel
+Alternatively, the library can be fetched on demand by the CMake project:
+
+```cmake
+include(FetchContent)
+FetchContent_Declare(bmpi3 GIT_REPOSITORY https://gitlab.com/correaa/boost-mpi3.git)  # or git@gitlab.com:correaa/boost-mpi3.git
+FetchContent_MakeAvailable(bmpi3)
+
+target_link_libraries(your_executable PRIVATE bmpi3)
 ```
 
-Some systems require loading the MPI module before compiling and using MPI programs, `module load mpi/mpich`.
+Some systems require loading the MPI module before compiling and using MPI programs, `module load mpi` (or `mpich`).
 
-The library is tested frequently against `openmpi` and `mpich`, and less frequently with `mvapich2`.
+The library is tested frequently against `openmpi` and `mpich` implementations of MPI.
 
 ## Testing
 
@@ -119,7 +124,7 @@ ctest
 ## Initialization
 
 Like MPI, B.MPI3 requires some global library initialization.
-The library includes a convenience `mpi3/main.hpp` which wraps around this initialization steps and *simulates* a main function. 
+The library includes a convenience header `mpi3/main.hpp`, which provides a "main" function that does this initialization. 
 In this way, a parallel program looks very much like normal programs, except that the main function has a third argument with the default global communicator passed in.
 
 ```cpp
@@ -129,10 +134,9 @@ In this way, a parallel program looks very much like normal programs, except tha
 #include<iostream>
 
 namespace mpi3 = boost::mpi3; 
-using std::cout;
 
-int mpi3::main(int argc, char* argv[], mpi3::communicator world){
-	if(world.rank() == 0) cout << mpi3::version() << '\n';
+int mpi3::main(int argc, char** argv, mpi3::communicator world) {
+	if(world.rank() == 0) {std::cout << mpi3::version() << '\n';}
 	return 0;
 }
 ```