@@ -14,10 +14,11 @@ namespace cuvs::neighbors::detail {
1414/* *
1515 * @brief (Thread-)Safely invoke a kernel with a maximum dynamic shared memory size.
1616 *
17- * Maintains a monotonically growing high-water mark for `cudaFuncAttributeMaxDynamicSharedMemorySize`.
18- * When the kernel function pointer changes, the new kernel is brought up to the current high-water
19- * mark; when smem_size exceeds the high-water mark, it is grown for the current kernel.
20- * This guarantees every kernel's attribute is always >= smem_size at the time of launch.
17+ * Maintains a monotonically growing high-water mark for
18+ * `cudaFuncAttributeMaxDynamicSharedMemorySize`. When the kernel function pointer changes, the new
19+ * kernel is brought up to the current high-water mark; when smem_size exceeds the high-water mark,
20+ * it is grown for the current kernel. This guarantees every kernel's attribute is always >=
21+ * smem_size at the time of launch.
2122 *
2223 * NB: cudaFuncSetAttribute is per kernel function pointer value, not per type. Multiple kernel
2324 * template instantiations may share the same KernelT type (e.g. function pointers with the same
@@ -50,7 +51,7 @@ void safely_launch_kernel_with_smem_size(KernelT const& kernel,
5051 // mark. This is necessary because cudaFuncSetAttribute applies to a specific function pointer,
5152 // not to the pointer type — different template instantiations may share the same KernelT.
5253 if (kernel != last_kernel) {
53- current_kernel = kernel;
54+ current_kernel = kernel;
5455 auto launch_status =
5556 cudaFuncSetAttribute (kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, last_smem_size);
5657 RAFT_EXPECTS (launch_status == cudaSuccess,
@@ -60,7 +61,7 @@ void safely_launch_kernel_with_smem_size(KernelT const& kernel,
6061 // When smem_size exceeds the high-water mark, grow it for the current kernel.
6162 // If the kernel also changed above, this handles the case where smem_size > last_smem_size.
6263 if (smem_size > last_smem_size) {
63- current_smem_size = smem_size;
64+ current_smem_size = smem_size;
6465 auto launch_status =
6566 cudaFuncSetAttribute (kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
6667 RAFT_EXPECTS (launch_status == cudaSuccess,
0 commit comments