Skip to content

Commit

Permalink
Add WIP OpenCL
Browse files Browse the repository at this point in the history
Signed-off-by: Bensuperpc <bensuperpc@gmail.com>
  • Loading branch information
bensuperpc committed Dec 28, 2023
1 parent d3291b8 commit 3eae141
Show file tree
Hide file tree
Showing 27 changed files with 871 additions and 63 deletions.
17 changes: 9 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,20 @@ include(cmake/variables.cmake)

# ---- Enable Utile ----
include(cmake/utile/ccache.cmake)
# include(cmake/utile/lto.cmake)
# include(cmake/utile/ninja_color.cmake)

# For Qt
set(CMAKE_AUTOUIC ON)
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)

find_package(QT NAMES Qt6 COMPONENTS Quick QuickControls2 Gui Core Qml Charts Multimedia Widgets Concurrent SerialPort)
find_package(Qt${QT_VERSION_MAJOR} COMPONENTS Quick QuickControls2 Gui Core Qml Charts Multimedia Widgets Concurrent SerialPort)

if (Qt${QT_VERSION_MAJOR}_FOUND)
qt_standard_project_setup(REQUIRES 6.5)
if (Qt${QT_VERSION_MAJOR}_VERSION VERSION_GREATER_EQUAL 6.5)
qt_standard_project_setup()
else()
set(CMAKE_AUTOMOC ON)
set(CMAKE_AUTORCC ON)
set(CMAKE_AUTOUIC ON)
set(CMAKE_AUTOSRC ON)
endif()
else()
message(WARNING "Qt${QT_VERSION_MAJOR} not found, project will be CLI only")
endif()
Expand All @@ -42,7 +43,7 @@ find_package(OpenMP 2.0)

find_package(CUDAToolkit 11.0)

#find_package(OpenCL 2.0)
find_package(OpenCL 2.0)

# Download ThreadPool
include(cmake/lib/threadpool.cmake)
Expand Down
23 changes: 19 additions & 4 deletions source/gta_cheat_finder/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ find_package(OpenMP 2.0)

find_package(CUDAToolkit 11.0)

#find_package(OpenCL 2.0)
find_package(OpenCL 2.0)

# Download ThreadPool
include(../../cmake/lib/threadpool.cmake)
Expand All @@ -40,6 +40,10 @@ if (CUDAToolkit_FOUND)
add_subdirectory(cuda)
endif()

if (OpenCL_FOUND)
add_subdirectory(opencl)
endif()

set(SRCS
state/GTA_SA_cheat_finder_openmp.cpp
state/GTA_SA_cheat_finder_stdthread.cpp
Expand All @@ -59,6 +63,11 @@ if (CUDAToolkit_FOUND)
list(APPEND HEADERS state/GTA_SA_cheat_finder_cuda.hpp)
endif()

if (OpenCL_FOUND)
list(APPEND SRCS state/GTA_SA_cheat_finder_opencl.cpp)
list(APPEND HEADERS state/GTA_SA_cheat_finder_opencl.hpp)
endif()

add_library(GTA_SA_cheat_finder_lib ${SRCS} ${HEADERS})

target_compile_features(GTA_SA_cheat_finder_lib PUBLIC cxx_std_20)
Expand All @@ -75,9 +84,15 @@ if (OpenMP_FOUND OR OpenMP_CXX_FOUND)
endif()

if (CUDAToolkit_FOUND)
target_include_directories(GTA_SA_cheat_finder_lib PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(GTA_SA_cheat_finder_lib PUBLIC CUDA::cudart)
target_link_libraries(GTA_SA_cheat_finder_lib PUBLIC cuda_lib)
target_include_directories(GTA_SA_cheat_finder_lib PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(GTA_SA_cheat_finder_lib PRIVATE CUDA::cudart)
target_link_libraries(GTA_SA_cheat_finder_lib PRIVATE cuda_lib)
endif()

if (OpenCL_FOUND)
target_include_directories(GTA_SA_cheat_finder_lib PRIVATE ${OpenCL_INCLUDE_DIRS})
target_link_libraries(GTA_SA_cheat_finder_lib PRIVATE ${OpenCL_LIBRARIES})
target_link_libraries(GTA_SA_cheat_finder_lib PRIVATE opencl_lib)
endif()

set_target_properties(GTA_SA_cheat_finder_lib
Expand Down
2 changes: 1 addition & 1 deletion source/gta_cheat_finder/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ set(HEADERS
add_library(cuda_lib ${SRCS} ${HEADERS})

#if (CUDAToolkit_FOUND)
target_include_directories(cuda_lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
#target_include_directories(cuda_lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(cuda_lib PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(cuda_lib PRIVATE CUDA::cudart)
#target_compile_features(cuda_lib PUBLIC cuda_std_17)
Expand Down
12 changes: 6 additions & 6 deletions source/gta_cheat_finder/cuda/jamcrc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ __device__ uint32_t jamcrc1Byte(const void* data, uint16_t length, const uint32_
uint32_t crc = ~previousCrc32;
uint8_t* current = (uint8_t*)data;
while (length--)
crc = (crc >> 8) ^ crc32_lookup[0][(crc & 0xFF) ^ *current++];
crc = (crc >> 8) ^ crc32LookupTable[0][(crc & 0xFF) ^ *current++];
return crc;
}

Expand All @@ -13,16 +13,16 @@ __device__ uint32_t jamcrc4Byte(const void* data, uint16_t length, const uint32_
uint32_t* current = (uint32_t*)data;
while (length >= 4) {
uint32_t one = *current++ ^ crc;
crc = crc32_lookup[0][(one >> 24) & 0xFF]
^ crc32_lookup[1][(one >> 16) & 0xFF]
^ crc32_lookup[2][(one >> 8) & 0xFF]
^ crc32_lookup[3][one & 0xFF];
crc = crc32LookupTable[0][(one >> 24) & 0xFF]
^ crc32LookupTable[1][(one >> 16) & 0xFF]
^ crc32LookupTable[2][(one >> 8) & 0xFF]
^ crc32LookupTable[3][one & 0xFF];
length -= 4;
}

uint8_t* currentChar = (uint8_t*)(current);
while (length-- != 0)
crc =(crc >> 8) ^ crc32_lookup[0][(crc & 0xFF) ^ *currentChar++];
crc =(crc >> 8) ^ crc32LookupTable[0][(crc & 0xFF) ^ *currentChar++];

return crc;
}
2 changes: 1 addition & 1 deletion source/gta_cheat_finder/cuda/jamcrc.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ typedef unsigned long long int uint64_cu;
__device__ uint32_t jamcrc1Byte(const void* data, uint16_t length, const uint32_t previousCrc32);
__device__ uint32_t jamcrc4Byte(const void* data, uint16_t length, const uint32_t previousCrc32);

__device__ const uint32_t crc32_lookup[4][256] = {
__device__ const uint32_t crc32LookupTable[4][256] = {
{0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E,
0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB,
0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8,
Expand Down
4 changes: 1 addition & 3 deletions source/gta_cheat_finder/cuda/kernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ __device__ const uint32_t cheatList[87] = {
0xF53EF5A5, 0xF2AA0C1D, 0xF36345A8, 0x8990D5E1, 0xB7013B1B, 0xCAEC94EE, 0x31F0C3CC, 0xB3B3E72A, 0xC25CDBFF, 0xD5CF4EFF, 0x680416B1,
0xCF5FDA18, 0xF01286E9, 0xA841CC0A, 0x31EA09CF, 0xE958788A, 0x02C83A7C, 0xE49C3ED4, 0x171BA8CC, 0x86988DAE, 0x2BDD2FA1};

__device__ const uint32_t string_size_alphabet = 27;

__device__ const uint8_t alpha[string_size_alphabet] = {"ABCDEFGHIJKLMNOPQRSTUVWXYZ"};
__device__ const uint8_t alpha[27] = {"ABCDEFGHIJKLMNOPQRSTUVWXYZ"};

#endif
20 changes: 11 additions & 9 deletions source/gta_cheat_finder/cuda/wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ __host__ uint32_t my::cuda::jamcrc(const void* data, const uint64_t length, cons
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);

// Calculate length of the array with max_range and min_range
uint64_t data_size = (length) * sizeof(char);
uint32_t* data_cuda = nullptr;

Expand All @@ -53,25 +52,23 @@ __host__ uint32_t my::cuda::jamcrc(const void* data, const uint64_t length, cons
memcpy(data_cuda, data, data_size);
*result_cuda = 0;

uint64_t grid_size = static_cast<uint64_t>(ceil(static_cast<double>(1) / cuda_block_size));

dim3 threads(static_cast<uint32_t>(cuda_block_size), 1, 1);
dim3 grid(static_cast<uint32_t>(grid_size), 1, 1);
dim3 threads(1, 1, 1);
dim3 grid(1, 1, 1);

jamcrcKernelWrapper<<<grid, threads, device, stream>>>(data_cuda, result_cuda, length, previousCrc32);

cudaStreamSynchronize(stream);
cudaDeviceSynchronize();

// std::cout << "result_cuda: " << *result_cuda << std::endl;

uint32_t result = *result_cuda;
cudaFree(data_cuda);
cudaFree(result_cuda);
cudaStreamDestroy(stream);

return *result_cuda;
return result;
}

__host__ void my::cuda::launch_kernel(std::vector<uint32_t>& jamcrc_results,
__host__ void my::cuda::launchKernel(std::vector<uint32_t>& jamcrc_results,
std::vector<uint64_t>& index_results,
const uint64_t min_range,
const uint64_t max_range,
Expand Down Expand Up @@ -121,6 +118,11 @@ __host__ void my::cuda::launch_kernel(std::vector<uint32_t>& jamcrc_results,
cudaMemPrefetchAsync(indexResultsPtr, indexResultsSize, device, stream);
cudaMemPrefetchAsync(ResultsSize, 1 * sizeof(uint32_t), device, stream);

if (jamcrcResultsPtr == nullptr || indexResultsPtr == nullptr || ResultsSize == nullptr) {
std::cout << "Error: Could not allocate memory on GPU" << std::endl;
return;
}

uint64_t rest = static_cast<uint64_t>((calcRange / cuda_block_size) + (calcRange % cuda_block_size));
uint32_t cubeRoot = static_cast<uint32_t>(std::ceil(std::cbrt(static_cast<long double>(rest))));

Expand Down
2 changes: 1 addition & 1 deletion source/gta_cheat_finder/cuda/wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
namespace my::cuda {
__host__ uint32_t jamcrc(const void* data, const uint64_t length, const uint32_t previousCrc32, const uint32_t cuda_block_size = 32);

__host__ void launch_kernel(std::vector<uint32_t>& jamcrc_results,
__host__ void launchKernel(std::vector<uint32_t>& jamcrc_results,
std::vector<uint64_t>& index_results,
const uint64_t min_range,
const uint64_t max_range,
Expand Down
57 changes: 57 additions & 0 deletions source/gta_cheat_finder/opencl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
##############################################################
# ____ #
# | __ ) ___ _ __ ___ _ _ _ __ ___ _ __ _ __ ___ #
# | _ \ / _ \ '_ \/ __| | | | '_ \ / _ \ '__| '_ \ / __| #
# | |_) | __/ | | \__ \ |_| | |_) | __/ | | |_) | (__ #
# |____/ \___|_| |_|___/\__,_| .__/ \___|_| | .__/ \___| #
# |_| |_| #
##############################################################
# #
# BenLib, 2021 #
# Created: 16, March, 2021 #
# Modified: 17, March, 2021 #
# file: CMakeLists.txt #
# CMake #
# Source: #
# OS: ALL #
# CPU: ALL #
# #
##############################################################

cmake_minimum_required(VERSION 3.14)
project(
opencl_lib
VERSION 0.1.0
DESCRIPTION "Krack cuda lib"
HOMEPAGE_URL "https://github.com/bensuperpc"
LANGUAGES C CXX
)

find_package(OpenCL 2.0 REQUIRED)

set(SRCS
wrapper.cpp
kernel.cpp
)

set(HEADERS
wrapper.hpp
kernel.hpp
)

add_library(opencl_lib ${SRCS} ${HEADERS})

#target_include_directories(opencl_lib PUBLIC ${OpenCL_INCLUDE_DIRS})
#target_link_libraries(opencl_lib PUBLIC ${OpenCL_LIBRARIES})

#target_include_directories(opencl_lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(opencl_lib PRIVATE OpenCL::OpenCL)
#target_compile_features(opencl_lib PUBLIC cuda_std_17)

set_target_properties(opencl_lib
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
)
Loading

0 comments on commit 3eae141

Please sign in to comment.