Skip to content

Commit ce25b0d

Browse files
authored
[Fix]Enable abacus to be compiled with CXX 17 & CUDA 13 (#6777)
* Update cuda_compat.h * Create cuda_compat.cpp * Update output_device.cpp * Update global.h * Update helper_cuda.h * Update output_device.cpp * Update global.h * Update cuda_compat.h * Update global.h * Update helper_cuda.h * Update helper_cuda.h * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt * Update cuda_compat.cpp * Update cuda_compat.cpp * Update CMakeLists.txt * Update CMakeLists.txt
1 parent c677ec6 commit ce25b0d

File tree

7 files changed

+183
-141
lines changed

7 files changed

+183
-141
lines changed

CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,9 @@ set(ABACUS_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${ABACUS_BIN_NAME})
174174
include_directories(${ABACUS_SOURCE_DIR})
175175
include_directories(${ABACUS_SOURCE_DIR}/source_base/module_container)
176176

177-
set(CMAKE_CXX_STANDARD 11)
177+
if(NOT DEFINED CMAKE_CXX_STANDARD)
178+
set(CMAKE_CXX_STANDARD 11)
179+
endif()
178180
set(CMAKE_CXX_STANDARD_REQUIRED ON)
179181

180182
add_executable(${ABACUS_BIN_NAME} source/source_main/main.cpp)
@@ -330,6 +332,10 @@ endif()
330332
if(USE_CUDA)
331333
cmake_minimum_required(VERSION 3.18) # required by `CUDA_ARCHITECTURES` below
332334
set_if_higher(CMAKE_CXX_STANDARD 14)
335+
if(CUDA_VERSION VERSION_GREATER_EQUAL "13.0")
336+
message(STATUS "CUDA ${CUDA_VERSION} detected. Setting CMAKE_CUDA_STANDARD to 17.")
337+
set_if_higher(CMAKE_CXX_STANDARD 17)
338+
endif()
333339
set(CMAKE_CXX_EXTENSIONS ON)
334340
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
335341
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

source/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ if(USE_CUDA)
8181
source_pw/module_pwdft/kernels/cuda/stress_op.cu
8282
source_pw/module_pwdft/kernels/cuda/wf_op.cu
8383
source_pw/module_pwdft/kernels/cuda/vnl_op.cu
84+
source_base/module_device/cuda_compat.cpp
8485
source_base/kernels/cuda/math_ylm_op.cu
8586
source_base/kernels/cuda/math_kernel_op.cu
8687
source_base/kernels/cuda/math_kernel_op_vec.cu
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
#include "cuda_compat.h"
2+
3+
namespace ModuleBase {
4+
namespace cuda_compat {
5+
6+
//---------------------------------------------------------------------------
7+
// Implementation of printDeprecatedDeviceInfo and printComputeModeInfo
8+
//---------------------------------------------------------------------------
9+
void printDeprecatedDeviceInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp)
10+
{
11+
#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
12+
char msg[1024];
13+
sprintf(msg,
14+
" GPU Max Clock rate: %.0f MHz (%0.2f "
15+
"GHz)\n",
16+
deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
17+
ofs_device << msg << std::endl;
18+
// This is supported in CUDA 5.0 (runtime API device properties)
19+
sprintf(msg, " Memory Clock rate: %.0f Mhz\n",
20+
deviceProp.memoryClockRate * 1e-3f);
21+
ofs_device << msg << std::endl;
22+
23+
sprintf(msg, " Memory Bus Width: %d-bit\n",
24+
deviceProp.memoryBusWidth);
25+
ofs_device << msg << std::endl;
26+
27+
sprintf(msg,
28+
" Concurrent copy and kernel execution: %s with %d copy "
29+
"engine(s)\n",
30+
(deviceProp.deviceOverlap ? "Yes" : "No"),
31+
deviceProp.asyncEngineCount);
32+
ofs_device << msg << std::endl;
33+
sprintf(msg, " Run time limit on kernels: %s\n",
34+
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
35+
ofs_device << msg << std::endl;
36+
#endif
37+
}
38+
39+
void printComputeModeInfo(std::ostream& ofs_device, const cudaDeviceProp& deviceProp)
40+
{
41+
#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
42+
char msg[1024];
43+
sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n",
44+
deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
45+
ofs_device << msg << std::endl;
46+
47+
const char *sComputeMode[] = {
48+
"Default (multiple host threads can use ::cudaSetDevice() with device "
49+
"simultaneously)",
50+
"Exclusive (only one host thread in one process is able to use "
51+
"::cudaSetDevice() with this device)",
52+
"Prohibited (no host thread can use ::cudaSetDevice() with this "
53+
"device)",
54+
"Exclusive Process (many threads in one process is able to use "
55+
"::cudaSetDevice() with this device)",
56+
"Unknown",
57+
NULL};
58+
sprintf(msg, " Compute Mode:\n");
59+
ofs_device << msg << std::endl;
60+
ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl
61+
<< std::endl;
62+
#endif
63+
}
64+
65+
//-------------------------------------------------------------------------------------------------
66+
// Implementation of cufftGetErrorStringCompat
67+
//-------------------------------------------------------------------------------------------------
68+
const char* cufftGetErrorStringCompat(cufftResult_t error)
69+
{
70+
switch (error)
71+
{
72+
case CUFFT_SUCCESS:
73+
return "CUFFT_SUCCESS";
74+
case CUFFT_INVALID_PLAN:
75+
return "CUFFT_INVALID_PLAN";
76+
case CUFFT_ALLOC_FAILED:
77+
return "CUFFT_ALLOC_FAILED";
78+
case CUFFT_INVALID_TYPE:
79+
return "CUFFT_INVALID_TYPE";
80+
case CUFFT_INVALID_VALUE:
81+
return "CUFFT_INVALID_VALUE";
82+
case CUFFT_INTERNAL_ERROR:
83+
return "CUFFT_INTERNAL_ERROR";
84+
case CUFFT_EXEC_FAILED:
85+
return "CUFFT_EXEC_FAILED";
86+
case CUFFT_SETUP_FAILED:
87+
return "CUFFT_SETUP_FAILED";
88+
case CUFFT_INVALID_SIZE:
89+
return "CUFFT_INVALID_SIZE";
90+
case CUFFT_UNALIGNED_DATA:
91+
return "CUFFT_UNALIGNED_DATA";
92+
case CUFFT_INVALID_DEVICE:
93+
return "CUFFT_INVALID_DEVICE";
94+
case CUFFT_NO_WORKSPACE:
95+
return "CUFFT_NO_WORKSPACE";
96+
case CUFFT_NOT_IMPLEMENTED:
97+
return "CUFFT_NOT_IMPLEMENTED";
98+
case CUFFT_NOT_SUPPORTED:
99+
return "CUFFT_NOT_SUPPORTED";
100+
101+
#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
102+
case CUFFT_INCOMPLETE_PARAMETER_LIST:
103+
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
104+
case CUFFT_PARSE_ERROR:
105+
return "CUFFT_PARSE_ERROR";
106+
case CUFFT_LICENSE_ERROR:
107+
return "CUFFT_LICENSE_ERROR";
108+
#endif
109+
110+
default:
111+
return "<unknown>";
112+
}
113+
}
114+
115+
} // namespace cuda_compat
116+
} // namespace ModuleBase

source/source_base/module_device/cuda_compat.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
#ifndef CUDA_COMPAT_H_
1313
#define CUDA_COMPAT_H_
1414

15+
#include <iostream> // For std::ostream
16+
#include <stdexcept> // For std::invalid_argument
1517
#include <cuda.h> // defines CUDA_VERSION
18+
#include <cuda_runtime.h>
19+
#include <cufft.h>
20+
1621

1722
// NVTX header for CUDA versions prior to 12.9 vs. 12.9+
1823
// This block ensures the correct NVTX header path is used based on CUDA_VERSION.
@@ -31,4 +36,46 @@
3136
#endif
3237
#endif
3338

39+
//-------------------------------------------------------------------------------------------------
40+
// Compatibility Layer Declarations
41+
//-------------------------------------------------------------------------------------------------
42+
namespace ModuleBase {
43+
namespace cuda_compat {
44+
45+
/**
46+
* @brief Prints device information that was deprecated or removed in CUDA 13.0.
47+
*
48+
* This function handles properties like clockRate, memoryClockRate, memoryBusWidth,
49+
* and concurrency flags, which are not available in newer CUDA toolkits.
50+
*
51+
* @param os The output stream (e.g., std::cout, std::ofstream).
52+
* @param prop The cudaDeviceProp structure containing device properties.
53+
*/
54+
void printDeprecatedDeviceInfo(std::ostream& os, const cudaDeviceProp& prop);
55+
56+
/**
57+
* @brief Prints the device's compute mode using a legacy string mapping.
58+
*
59+
* The compute mode display logic is encapsulated here as it relies on aspects
60+
* of the driver model that have changed.
61+
*
62+
* @param os The output stream (e.g., std::cout, std::ofstream).
63+
* @param prop The cudaDeviceProp structure containing device properties.
64+
*/
65+
void printComputeModeInfo(std::ostream& os, const cudaDeviceProp& prop);
66+
67+
/**
68+
* @brief Provides a cross-CUDA-version string conversion for cuFFT error codes.
69+
*
70+
* In CUDA 13.0, several error codes were removed. This function handles
71+
* these differences gracefully.
72+
*
73+
* @param error The cufftResult_t error code.
74+
* @return const char* A descriptive string for the error.
75+
*/
76+
const char* cufftGetErrorStringCompat(cufftResult_t error);
77+
78+
} // namespace cuda_compat
79+
} // namespace ModuleBase
80+
3481
#endif // CUDA_COMPAT_H_

source/source_base/module_device/output_device.cpp

Lines changed: 6 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#if defined(__CUDA)
1616
#include <cuda_runtime.h>
17+
#include "source_base/module_device/cuda_compat.h"
1718
#endif
1819

1920
#if defined(__ROCM)
@@ -218,27 +219,13 @@ void print_device_info<base_device::DEVICE_GPU>(
218219
sprintf(msg, " CUDA Capability Major/Minor version number: %d.%d\n",
219220
deviceProp.major, deviceProp.minor);
220221
ofs_device << msg << std::endl;
221-
sprintf(msg,
222-
" GPU Max Clock rate: %.0f MHz (%0.2f "
223-
"GHz)\n",
224-
deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
225-
ofs_device << msg << std::endl;
226-
// This is supported in CUDA 5.0 (runtime API device properties)
227-
sprintf(msg, " Memory Clock rate: %.0f Mhz\n",
228-
deviceProp.memoryClockRate * 1e-3f);
229-
ofs_device << msg << std::endl;
230-
231-
sprintf(msg, " Memory Bus Width: %d-bit\n",
232-
deviceProp.memoryBusWidth);
233-
ofs_device << msg << std::endl;
234222
sprintf(msg,
235223
" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, "
236224
"%d), 3D=(%d, %d, %d)\n",
237225
deviceProp.maxTexture1D, deviceProp.maxTexture2D[0],
238226
deviceProp.maxTexture2D[1], deviceProp.maxTexture3D[0],
239227
deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
240228
ofs_device << msg << std::endl;
241-
242229
sprintf(
243230
msg,
244231
" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
@@ -285,15 +272,6 @@ void print_device_info<base_device::DEVICE_GPU>(
285272
sprintf(msg, " Texture alignment: %zu bytes\n",
286273
deviceProp.textureAlignment);
287274
ofs_device << msg << std::endl;
288-
sprintf(msg,
289-
" Concurrent copy and kernel execution: %s with %d copy "
290-
"engine(s)\n",
291-
(deviceProp.deviceOverlap ? "Yes" : "No"),
292-
deviceProp.asyncEngineCount);
293-
ofs_device << msg << std::endl;
294-
sprintf(msg, " Run time limit on kernels: %s\n",
295-
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
296-
ofs_device << msg << std::endl;
297275
sprintf(msg, " Integrated GPU sharing Host Memory: %s\n",
298276
deviceProp.integrated ? "Yes" : "No");
299277
ofs_device << msg << std::endl;
@@ -318,28 +296,14 @@ void print_device_info<base_device::DEVICE_GPU>(
318296
sprintf(msg, " Supports Cooperative Kernel Launch: %s\n",
319297
deviceProp.cooperativeLaunch ? "Yes" : "No");
320298
ofs_device << msg << std::endl;
321-
sprintf(msg, " Supports MultiDevice Co-op Kernel Launch: %s\n",
322-
deviceProp.cooperativeMultiDeviceLaunch ? "Yes" : "No");
323-
ofs_device << msg << std::endl;
324299
sprintf(msg,
325300
" Device PCI Domain ID / Bus ID / location ID: %d / %d / %d\n",
326301
deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
327302
ofs_device << msg << std::endl;
328-
const char *sComputeMode[] = {
329-
"Default (multiple host threads can use ::cudaSetDevice() with device "
330-
"simultaneously)",
331-
"Exclusive (only one host thread in one process is able to use "
332-
"::cudaSetDevice() with this device)",
333-
"Prohibited (no host thread can use ::cudaSetDevice() with this "
334-
"device)",
335-
"Exclusive Process (many threads in one process is able to use "
336-
"::cudaSetDevice() with this device)",
337-
"Unknown",
338-
NULL};
339-
sprintf(msg, " Compute Mode:\n");
340-
ofs_device << msg << std::endl;
341-
ofs_device << " " << sComputeMode[deviceProp.computeMode] << std::endl
342-
<< std::endl;
303+
304+
ModuleBase::cuda_compat::printDeprecatedDeviceInfo(ofs_device, deviceProp);
305+
306+
ModuleBase::cuda_compat::printComputeModeInfo(ofs_device, deviceProp);
343307

344308
// If there are 2 or more GPUs, query to determine whether RDMA is supported
345309
if (deviceCount >= 2) {
@@ -629,4 +593,4 @@ void record_device_memory<base_device::DEVICE_GPU>(
629593

630594
#endif
631595
}
632-
}
596+
}

source/source_hsolver/kernels/cuda/helper_cuda.h

Lines changed: 4 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040

4141
#include "helper_string.h"
4242

43+
#include "source_base/module_device/cuda_compat.h"
44+
4345
#ifndef EXIT_WAIVED
4446
#define EXIT_WAIVED 2
4547
#endif
@@ -107,60 +109,7 @@ static const char *_cudaGetErrorEnum(cublasStatus_t error) {
107109
#ifdef _CUFFT_H_
108110
// cuFFT API errors
109111
static const char *_cudaGetErrorEnum(cufftResult error) {
110-
switch (error) {
111-
case CUFFT_SUCCESS:
112-
return "CUFFT_SUCCESS";
113-
114-
case CUFFT_INVALID_PLAN:
115-
return "CUFFT_INVALID_PLAN";
116-
117-
case CUFFT_ALLOC_FAILED:
118-
return "CUFFT_ALLOC_FAILED";
119-
120-
case CUFFT_INVALID_TYPE:
121-
return "CUFFT_INVALID_TYPE";
122-
123-
case CUFFT_INVALID_VALUE:
124-
return "CUFFT_INVALID_VALUE";
125-
126-
case CUFFT_INTERNAL_ERROR:
127-
return "CUFFT_INTERNAL_ERROR";
128-
129-
case CUFFT_EXEC_FAILED:
130-
return "CUFFT_EXEC_FAILED";
131-
132-
case CUFFT_SETUP_FAILED:
133-
return "CUFFT_SETUP_FAILED";
134-
135-
case CUFFT_INVALID_SIZE:
136-
return "CUFFT_INVALID_SIZE";
137-
138-
case CUFFT_UNALIGNED_DATA:
139-
return "CUFFT_UNALIGNED_DATA";
140-
141-
case CUFFT_INCOMPLETE_PARAMETER_LIST:
142-
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
143-
144-
case CUFFT_INVALID_DEVICE:
145-
return "CUFFT_INVALID_DEVICE";
146-
147-
case CUFFT_PARSE_ERROR:
148-
return "CUFFT_PARSE_ERROR";
149-
150-
case CUFFT_NO_WORKSPACE:
151-
return "CUFFT_NO_WORKSPACE";
152-
153-
case CUFFT_NOT_IMPLEMENTED:
154-
return "CUFFT_NOT_IMPLEMENTED";
155-
156-
case CUFFT_LICENSE_ERROR:
157-
return "CUFFT_LICENSE_ERROR";
158-
159-
case CUFFT_NOT_SUPPORTED:
160-
return "CUFFT_NOT_SUPPORTED";
161-
}
162-
163-
return "<unknown>";
112+
return ModuleBase::cuda_compat::cufftGetErrorStringCompat(error);
164113
}
165114
#endif
166115

@@ -965,4 +914,4 @@ inline bool checkCudaCapabilities(int major_version, int minor_version) {
965914

966915
// end of CUDA Helper Functions
967916

968-
#endif // COMMON_HELPER_CUDA_H_
917+
#endif // COMMON_HELPER_CUDA_H_

0 commit comments

Comments
 (0)