Skip to content

Commit b1d6e2c

Browse files
Junkai-Wujwu1980
andauthored
v4.3 update. (NVIDIA#2709)
* v4.3 update. * Update the cute_dsl_api changelog's doc link * Update version to 4.3.0 * Update the example link * Update doc to encourage user to install DSL from requirements.txt --------- Co-authored-by: Larry Wu <larwu@nvidia.com>
1 parent e6e2cc2 commit b1d6e2c

File tree

244 files changed

+59539
-10722
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

244 files changed

+59539
-10722
lines changed

CHANGELOG.md

Lines changed: 86 additions & 20 deletions
Large diffs are not rendered by default.

CMakeLists.txt

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ endif()
7373

7474
include(${CMAKE_CURRENT_SOURCE_DIR}/CUDA.cmake)
7575

76+
# nvcc supports response files with --options-file but some tools like clangd
77+
# might choke on it. Thus provide a way to control the use of this feature.
78+
set(CUTLASS_CUDA_USE_RESPONSE_FILE ON CACHE BOOL "Enable CUDA response files for includes, libraries, and objects")
79+
80+
if(NOT CUTLASS_CUDA_USE_RESPONSE_FILE)
81+
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_INCLUDES 0)
82+
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_LIBRARIES 0)
83+
set(CMAKE_CUDA_USE_RESPONSE_FILE_FOR_OBJECTS 0)
84+
endif()
85+
7686
if (CUDA_VERSION VERSION_LESS 11.3)
7787
message(WARNING "CUTLASS ${CUTLASS_VERSION} requires CUDA 11.4 or higher, and strongly recommends CUDA 11.8 or higher.")
7888
elseif (CUDA_VERSION VERSION_LESS 11.4)
@@ -804,9 +814,9 @@ if(NOT WIN32)
804814
# Add common library search paths so executables and libraries can load and run
805815
# without LD_LIBRARY_PATH being set.
806816
link_libraries(
807-
"-Wl,-rpath,'$ORIGIN'"
808-
"-Wl,-rpath,'$ORIGIN/../lib64'"
809-
"-Wl,-rpath,'$ORIGIN/../lib'"
817+
"-Wl,-rpath,'$$ORIGIN'"
818+
"-Wl,-rpath,'$$ORIGIN/../lib64'"
819+
"-Wl,-rpath,'$$ORIGIN/../lib'"
810820
"-Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/lib64'"
811821
"-Wl,-rpath,'${CUDA_TOOLKIT_ROOT_DIR}/lib'"
812822
${CMAKE_DL_LIBS}
@@ -934,7 +944,7 @@ function(cutlass_add_executable_tests NAME TARGET)
934944

935945
install(
936946
FILES ${__RESULT_CACHE_FILE}
937-
DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR}/
947+
DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR}
938948
)
939949

940950
endif()
@@ -1062,7 +1072,7 @@ function(cutlass_generate_profiler_tests NAME)
10621072

10631073
install(
10641074
FILES ${CUTLASS_PROFILER_REGRESSION_LIST_FILE}
1065-
DESTINATION ${CMAKE_INSTALL_INFODIR}/cutlass/
1075+
DESTINATION ${CMAKE_INSTALL_INFODIR}/cutlass
10661076
RENAME profiler_regressions.csv
10671077
)
10681078

README.md

Lines changed: 86 additions & 111 deletions
Large diffs are not rendered by default.

customConfigs.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ set(CUTLASS_PROFILER_REGRESSION_TEST_LEVEL ${CUTLASS_TEST_LEVEL} CACHE STRING "
3636

3737
find_package(Python3 3.5 COMPONENTS Interpreter REQUIRED)
3838

39+
3940
function(cutlass_generate_kernel_filter_and_testlist_files)
4041

4142
set(options)

examples/70_blackwell_gemm/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ set(TEST_SWIZZLE_2 --swizzle=2)
3333
set(TEST_SWIZZLE_5 --swizzle=5)
3434
set(TEST_SWIZZLE_5_UNEVEN --swizzle=5 --m=4096 --n=16384)
3535

36-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a" OR CUTLASS_NVCC_ARCHS STREQUAL "100f" OR CUTLASS_NVCC_ARCHS STREQUAL "101a" OR CUTLASS_NVCC_ARCHS STREQUAL "101f" OR CUTLASS_NVCC_ARCHS STREQUAL "103a" OR CUTLASS_NVCC_ARCHS STREQUAL "103f")
36+
if(CUTLASS_NVCC_ARCHS MATCHES "100a|100f|101a|101f|103a|103f")
3737
cutlass_example_add_executable(
3838
70_blackwell_fp16_gemm
3939
70_blackwell_fp16_gemm.cu

examples/71_blackwell_gemm_with_collective_builder/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828

2929
# Both filenames are shorter to avoid MAX_PATH issues on Windows.
30-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a" OR CUTLASS_NVCC_ARCHS STREQUAL "100f" OR CUTLASS_NVCC_ARCHS STREQUAL "101a" OR CUTLASS_NVCC_ARCHS STREQUAL "101f" OR CUTLASS_NVCC_ARCHS STREQUAL "103a" OR CUTLASS_NVCC_ARCHS STREQUAL "103f")
30+
if(CUTLASS_NVCC_ARCHS MATCHES "100a|100f|101a|101f|103a|103f")
3131
cutlass_example_add_executable(
3232
71_blackwell_gemm_with_collective_builder
3333
71_blackwell_gemm_with_collective_builder.cu

examples/72_blackwell_narrow_precision_gemm/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030

31-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a" OR CUTLASS_NVCC_ARCHS STREQUAL "100f" OR CUTLASS_NVCC_ARCHS STREQUAL "101a" OR CUTLASS_NVCC_ARCHS STREQUAL "101f" OR CUTLASS_NVCC_ARCHS STREQUAL "103a" OR CUTLASS_NVCC_ARCHS STREQUAL "103f")
31+
if(CUTLASS_NVCC_ARCHS MATCHES "100a|100f|101a|101f|103a|103f")
3232
cutlass_example_add_executable(
3333
72a_blackwell_nvfp4_bf16_gemm
3434
72a_blackwell_nvfp4_bf16_gemm.cu

examples/73_blackwell_gemm_preferred_cluster/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929

3030

31-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a" OR CUTLASS_NVCC_ARCHS STREQUAL "100f" OR CUTLASS_NVCC_ARCHS STREQUAL "101a" OR CUTLASS_NVCC_ARCHS STREQUAL "101f" OR CUTLASS_NVCC_ARCHS STREQUAL "103a" OR CUTLASS_NVCC_ARCHS STREQUAL "103f")
31+
if(CUTLASS_NVCC_ARCHS MATCHES "100a|100f|101a|101f|103a|103f")
3232
cutlass_example_add_executable(
3333
73_blackwell_gemm_preferred_cluster
3434
blackwell_gemm_preferred_cluster.cu

examples/74_blackwell_gemm_streamk/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030

3131

32-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a" OR CUTLASS_NVCC_ARCHS STREQUAL "100f" OR CUTLASS_NVCC_ARCHS STREQUAL "101a" OR CUTLASS_NVCC_ARCHS STREQUAL "101f" OR CUTLASS_NVCC_ARCHS STREQUAL "103a" OR CUTLASS_NVCC_ARCHS STREQUAL "103f")
32+
if(CUTLASS_NVCC_ARCHS MATCHES "100a|100f|101a|101f|103a|103f")
3333
cutlass_example_add_executable(
3434
74_blackwell_gemm_streamk
3535
blackwell_gemm_streamk.cu

examples/75_blackwell_grouped_gemm/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ set(TEST_SMALL_LARGE_GROUP --m=128 --n=128 --groups=50 --iterations=0)
4949
set(TEST_RANDOM_PERF --iterations=10) # Random problem sizes
5050
set(TEST_RANDOM_PERF_LARGE_GROUP --groups=50 --iterations=10) # Random problem sizes
5151

52-
if(CUTLASS_NVCC_ARCHS STREQUAL "100a")
52+
if("100a" IN_LIST CUTLASS_NVCC_ARCHS)
5353
cutlass_example_add_executable(
5454
75_blackwell_grouped_gemm
5555
75_blackwell_grouped_gemm.cu

0 commit comments

Comments
 (0)