-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.cu
More file actions
78 lines (69 loc) · 3.02 KB
/
utils.cu
File metadata and controls
78 lines (69 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include "utils.cuh"
#include <iostream>
const unsigned BLOCK_SIZE_X = 16;
const unsigned BLOCK_SIZE_Y = 16;
const unsigned WARP_SIZE = 32; // assumed, can be changed to match hardware
const double ONE = 1.0;
const double ZERO = 0.0;
const double NEGONE = -1.0;
const double TOL = 1.e-12;
/** CUDA */
int checkDevice() {
// Check Cuda Capabale Device
int deviceCount;
cudaGetDeviceCount(&deviceCount);
int device;
if (deviceCount > 0) {
for (device = 0; device < deviceCount; ++device) {
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, device);
printf("Device %s has compute capability %d.%d.\n\n", deviceProp.name, deviceProp.major, deviceProp.minor);
// printf("Number of multiprocessors: %d\n", deviceProp.multiProcessorCount);
// printf("Clock rate: %d Hz\n", deviceProp.clockRate);
// printf("Total amount of global memory: %d KB\n", deviceProp.totalGlobalMem / 1024);
// printf("Total amount of constant memory: %d KB\n", deviceProp.totalConstMem / 1024);
// printf("Total amount of shared memory per block: %d KB\n", deviceProp.sharedMemPerBlock / 1024);
// printf("Total amount of shared memory per SM: %d KB\n", 64);
// printf("Warp size: %d\n", deviceProp.warpSize);
// printf("Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
// printf("Maximum number of blocks per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor / deviceProp.maxThreadsPerBlock);
// printf("Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
// printf("Maximum number of warps per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor / 32);
// printf("Maximum Grid size: (%d,%d,%d)\n", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);
// printf("Maximum block dimension: (%d,%d,%d)\n\n", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2]);
}
} else {
printf("NO CUDA DEVICE AVAILABLE\n");
}
return deviceCount;
};
void cudaLastErrCheck() {
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
std::cout << "CUDA error: " << cudaGetErrorString(err) << std::endl;
}
}
/** cuBLAS */
#define MODE CUBLAS_POINTER_MODE_HOST
cublasStatus_t stat;
cudaError_t cudaStat;
cublasHandle_t handle;
void cublasStart() {
cublasErrCheck(cublasCreate(&handle));
cublasErrCheck(cublasSetPointerMode(handle, MODE));
};
void cublasStop() { cublasDestroy(handle); };
/** cuSPARSE */
#define SPMODE CUSPARSE_POINTER_MODE_HOST
cusparseStatus_t spStat;
cusparseHandle_t spHandle;
void cusparseStart() {
cusparseErrCheck(cusparseCreate(&spHandle));
cusparseErrCheck(cusparseSetPointerMode(spHandle, SPMODE));
};
void cusparseStop() { cusparseErrCheck(cusparseDestroy(spHandle)); };
/** cuSOLVER */
cusolverStatus_t solStat;
cusolverSpHandle_t solHandle;
void cusolverStart() { cusolverErrCheck(cusolverSpCreate(&solHandle)); };
void cusolverStop() { cusolverErrCheck(cusolverSpDestroy(solHandle)); };