Skip to content

Commit 5ec3027

Browse files
merged main in
1 parent 03bafdc commit 5ec3027

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

cpp/src/neighbors/detail/smem_utils.cuh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ namespace cuvs::neighbors::detail {
1414
/**
1515
* @brief (Thread-)Safely invoke a kernel with a maximum dynamic shared memory size.
1616
*
17-
* Maintains a monotonically growing high-water mark for `cudaFuncAttributeMaxDynamicSharedMemorySize`.
18-
* When the kernel function pointer changes, the new kernel is brought up to the current high-water
19-
* mark; when smem_size exceeds the high-water mark, it is grown for the current kernel.
20-
* This guarantees every kernel's attribute is always >= smem_size at the time of launch.
17+
* Maintains a monotonically growing high-water mark for
18+
* `cudaFuncAttributeMaxDynamicSharedMemorySize`. When the kernel function pointer changes, the new
19+
* kernel is brought up to the current high-water mark; when smem_size exceeds the high-water mark,
20+
* it is grown for the current kernel. This guarantees every kernel's attribute is always >=
21+
* smem_size at the time of launch.
2122
*
2223
* NB: cudaFuncSetAttribute is per kernel function pointer value, not per type. Multiple kernel
2324
* template instantiations may share the same KernelT type (e.g. function pointers with the same
@@ -50,7 +51,7 @@ void safely_launch_kernel_with_smem_size(KernelT const& kernel,
5051
// mark. This is necessary because cudaFuncSetAttribute applies to a specific function pointer,
5152
// not to the pointer type — different template instantiations may share the same KernelT.
5253
if (kernel != last_kernel) {
53-
current_kernel = kernel;
54+
current_kernel = kernel;
5455
auto launch_status =
5556
cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, last_smem_size);
5657
RAFT_EXPECTS(launch_status == cudaSuccess,
@@ -60,7 +61,7 @@ void safely_launch_kernel_with_smem_size(KernelT const& kernel,
6061
// When smem_size exceeds the high-water mark, grow it for the current kernel.
6162
// If the kernel also changed above, this handles the case where smem_size > last_smem_size.
6263
if (smem_size > last_smem_size) {
63-
current_smem_size = smem_size;
64+
current_smem_size = smem_size;
6465
auto launch_status =
6566
cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
6667
RAFT_EXPECTS(launch_status == cudaSuccess,

0 commit comments

Comments
 (0)