Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMakeLists.txt Improvements for CUDA #1337

Merged
merged 8 commits into from
Jan 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 38 additions & 48 deletions dace/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
cmake_minimum_required(VERSION 3.15)
cmake_minimum_required(VERSION 3.17)
project(dace_program)

# General options
Expand All @@ -9,6 +9,9 @@ set(DACE_FILES "" CACHE STRING "List of host code files relative to the root of
set(DACE_LIBS "" CACHE STRING "Extra libraries")
set(HLSLIB_PART_NAME "${DACE_XILINX_PART_NAME}")

# CUDA
set(DACE_CUDA_ARCHITECTURES_DEFAULT "" CACHE STRING "Default CUDA architectures in case native not found")

# FPGA specific
set(DACE_FPGA_AUTOBUILD_BITSTREAM OFF CACHE STRING "Automatically build bitstreams if they are not present.")

Expand Down Expand Up @@ -60,7 +63,7 @@ foreach(DACE_FILE ${DACE_FILES})
set(DACE_HIP_FILES ${DACE_HIP_FILES} ${DACE_FILE})
else()
set(DACE_ENABLE_CUDA ON)
set(DACE_CUDA_FILES ${DACE_CUDA_FILES} ${DACE_FILE})
set(DACE_CPP_FILES ${DACE_CPP_FILES} ${DACE_FILE})
endif()
elseif(${DACE_FILE_TARGET} STREQUAL "xilinx")
set(DACE_ENABLE_XILINX ON)
Expand Down Expand Up @@ -103,24 +106,42 @@ include_directories(${DACE_RUNTIME_DIR}/include)
# Global DaCe external dependencies
find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED COMPONENTS CXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")

list(APPEND DACE_LIBS Threads::Threads)
list(APPEND DACE_LIBS OpenMP::OpenMP_CXX)

add_definitions(-DDACE_BINARY_DIR=\"${CMAKE_BINARY_DIR}\")
set(DACE_LIBS ${DACE_LIBS} ${CMAKE_THREAD_LIBS_INIT} ${OpenMP_CXX_LIBRARIES})

if(DACE_ENABLE_MPI)
find_package(MPI REQUIRED)
include_directories(${MPI_CXX_INCLUDE_PATH})
set(DACE_LIBS ${DACE_LIBS} ${MPI_CXX_LIBRARIES})
list(APPEND DACE_LIBS MPI::MPI_CXX)
endif()

if(DACE_ENABLE_CUDA)
find_package(CUDA REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
include_directories(${CUDA_INCLUDE_DIRS})
if (MSVC_IDE)
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)
else()
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
set(CUDAToolkit_ROOT ${CUDA_TOOLKIT_ROOT_DIR})

find_package(CUDAToolkit REQUIRED)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)

# CMake 3.24: set_property(TARGET tgt PROPERTY CUDA_ARCHITECTURES native)
if (NOT DEFINED LOCAL_CUDA_ARCHITECTURES)
execute_process(COMMAND "${CUDAToolkit_NVCC_EXECUTABLE}" "--run"
"${CMAKE_SOURCE_DIR}/tools/get_cuda_arch.cpp"
OUTPUT_VARIABLE _local_arch RESULT_VARIABLE _arch_res)

if(_arch_res EQUAL 0)
set(LOCAL_CUDA_ARCHITECTURES "${_local_arch}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "Local CUDA architectures detected: ${LOCAL_CUDA_ARCHITECTURES}")
else()
set(LOCAL_CUDA_ARCHITECTURES "${DACE_CUDA_ARCHITECTURES_DEFAULT}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "No local CUDA-capable GPUs found. Using default: ${DACE_CUDA_ARCHITECTURES_DEFAULT}")
endif()
endif()
set(DACE_LIBS ${DACE_LIBS} ${CUDA_LIBRARIES})

set(CMAKE_CUDA_ARCHITECTURES "${LOCAL_CUDA_ARCHITECTURES}")
enable_language(CUDA)
list(APPEND DACE_LIBS CUDA::cudart)
add_definitions(-DWITH_CUDA)

if (MSVC_IDE)
Expand Down Expand Up @@ -242,38 +263,6 @@ if (DACE_ENABLE_RTL AND DACE_ENABLE_XILINX)
include ("${DACE_RTLLIB_DIR}/cmake/rtl_target.cmake")
endif()

# Create CUDA object files
if(DACE_ENABLE_CUDA)
# Get local CUDA architectures
if (NOT DEFINED LOCAL_CUDA_ARCHITECTURES)
execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin" "${CMAKE_CXX_COMPILER}" "--run"
"${CMAKE_SOURCE_DIR}/tools/get_cuda_arch.cpp"
OUTPUT_VARIABLE _arch_out RESULT_VARIABLE _arch_res
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)

if(_arch_res EQUAL 0)
string(REGEX REPLACE "\n" ";" _arch_out "${_arch_out}")
list(GET _arch_out -1 _local_arch)
string(REGEX REPLACE " " ";" _local_arch "${_local_arch}")
set(LOCAL_CUDA_ARCHITECTURES "${_local_arch}" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "Local CUDA architectures detected: ${LOCAL_CUDA_ARCHITECTURES}")
else()
set(LOCAL_CUDA_ARCHITECTURES "" CACHE STRING "Detected local GPUs for compilation")
message(STATUS "No local CUDA-capable GPUs found")
endif()
endif()

# Add flags to compile for local CUDA architectures
foreach(var ${LOCAL_CUDA_ARCHITECTURES})
list(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${var},code=sm_${var})
endforeach()

cuda_include_directories(${DACE_RUNTIME_DIR}/include)
cuda_compile(DACE_CUDA_OBJECTS ${DACE_CUDA_FILES})
set(DACE_OBJECTS ${DACE_OBJECTS} ${DACE_CUDA_OBJECTS})
endif() # DACE_ENABLE_CUDA


# Create HIP object files
if(DACE_ENABLE_HIP)
# Get local AMD architectures
Expand Down Expand Up @@ -580,7 +569,7 @@ include("targets/mlir/mlir.cmake")

# Create DaCe library file
add_library(${DACE_PROGRAM_NAME} SHARED ${DACE_CPP_FILES} ${DACE_OBJECTS})
target_link_libraries(${DACE_PROGRAM_NAME} ${DACE_LIBS})
target_link_libraries(${DACE_PROGRAM_NAME} PUBLIC ${DACE_LIBS})

# Add additional required files
if(DACE_ENABLE_INTELFPGA)
Expand All @@ -599,6 +588,7 @@ if(DACE_ENABLE_INTELFPGA)
DEPENDS ${DACE_PROGRAM_NAME}_hardware.aocx)
endif()
endif()

if(DACE_ENABLE_XILINX)
if(DACE_XILINX_MODE STREQUAL "software_emulation" AND DACE_FPGA_AUTOBUILD_BITSTREAM)
add_custom_target(autobuild_bitstream ALL
Expand All @@ -619,7 +609,7 @@ endif()

# Create DaCe loader stub
add_library(dacestub_${DACE_PROGRAM_NAME} SHARED "${CMAKE_SOURCE_DIR}/tools/dacestub.cpp")
target_link_libraries(dacestub_${DACE_PROGRAM_NAME} ${CMAKE_THREAD_LIBS_INIT} ${OpenMP_CXX_LIBRARIES})
target_link_libraries(dacestub_${DACE_PROGRAM_NAME} Threads::Threads OpenMP::OpenMP_CXX ${CMAKE_DL_LIBS})

# Windows-specific fixes
if (MSVC_IDE)
Expand Down
8 changes: 4 additions & 4 deletions dace/codegen/targets/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,11 +484,11 @@
cuda_arch = Config.get('compiler', 'cuda', 'cuda_arch').split(',')
cuda_arch = [ca for ca in cuda_arch if ca is not None and len(ca) > 0]

flags = Config.get("compiler", "cuda", "args")
flags += ' ' + ' '.join('-gencode arch=compute_{arch},code=sm_{arch}'.format(arch=arch)
for arch in cuda_arch)
cuda_arch = ';'.join(cuda_arch)
options.append(f'-DDACE_CUDA_ARCHITECTURES_DEFAULT="{cuda_arch}"')

Check warning on line 488 in dace/codegen/targets/cuda.py

View check run for this annotation

Codecov / codecov/patch

dace/codegen/targets/cuda.py#L487-L488

Added lines #L487 - L488 were not covered by tests

options.append("-DCUDA_NVCC_FLAGS=\"{}\"".format(flags))
flags = Config.get("compiler", "cuda", "args")
options.append("-DCMAKE_CUDA_FLAGS=\"{}\"".format(flags))

Check warning on line 491 in dace/codegen/targets/cuda.py

View check run for this annotation

Codecov / codecov/patch

dace/codegen/targets/cuda.py#L490-L491

Added lines #L490 - L491 were not covered by tests

if backend == 'hip':
hip_arch = Config.get('compiler', 'cuda', 'hip_arch').split(',')
Expand Down
15 changes: 10 additions & 5 deletions dace/codegen/tools/get_cuda_arch.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
// Copyright 2019-2021 ETH Zurich and the DaCe authors. All rights reserved.
#include <cuda_runtime.h>

#include <algorithm>
#include <iostream>
#include <iterator>
#include <set>
#include <sstream>
#include <string>

int main(int argc, char **argv) {
int main() {
int count;
if (cudaGetDeviceCount(&count) != cudaSuccess) return 1;

Expand All @@ -22,10 +24,13 @@ int main(int argc, char **argv) {
architectures.insert(ss.str());
}

// Print out architectures
for (std::set<std::string>::iterator iter = architectures.begin();
iter != architectures.end(); ++iter)
std::cout << *iter << " ";
if (architectures.empty()) {
return 1;
}

std::copy(architectures.begin(), std::prev(architectures.end(), 1),
std::ostream_iterator<std::string>(std::cout, ";"));
std::cout << *architectures.rbegin();

return 0;
}
4 changes: 2 additions & 2 deletions dace/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,8 @@ required:
type: str
title: nvcc Arguments
description: Compiler argument flags for CUDA
default: '-std=c++14 -Xcompiler -fPIC -O3 -Xcompiler -march=native --use_fast_math -Xcompiler -Wno-unused-parameter'
default_Windows: '-std=c++14 -O3 --use_fast_math'
default: '-Xcompiler -march=native --use_fast_math -Xcompiler -Wno-unused-parameter'
default_Windows: '-O3 --use_fast_math'

hip_args:
type: str
Expand Down
Loading