diff --git a/.github/scripts/fbgemm_gpu_integration.bash b/.github/scripts/fbgemm_gpu_integration.bash index 679c062d7a..23b9432a31 100644 --- a/.github/scripts/fbgemm_gpu_integration.bash +++ b/.github/scripts/fbgemm_gpu_integration.bash @@ -284,12 +284,14 @@ integration_fbgemm_gpu_install_matrix_run () { 12.8.1 12.9.1 13.0.2 + 13.2.0 ) elif [ "$variant_type" == "genai" ]; then local variant_versions=( 12.6.3 12.8.1 13.0.2 + 13.2.0 ) elif [ "$variant_type" == "rocm" ]; then local variant_versions=( diff --git a/.github/scripts/generate_ci_matrix.py b/.github/scripts/generate_ci_matrix.py index d91fab145a..3b615ac2b5 100644 --- a/.github/scripts/generate_ci_matrix.py +++ b/.github/scripts/generate_ci_matrix.py @@ -304,10 +304,10 @@ def cuda_versions(self) -> List[str]: # FBGEMM HSTU is expensive, so conserve CI resources return ["12.8.1"] elif self.target == TARGET_GENAI: - return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"] + return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] else: # GenAI is unable to support 11.8.0 anymore as of https://github.com/pytorch/FBGEMM/pull/4138 - return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"] + return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"] def rocm_versions(self) -> List[str]: if GitRepo.ref() == REFS_MAIN and GitRepo.event_name() == EVENT_NAME_PUSH: diff --git a/.github/scripts/nova_dir.bash b/.github/scripts/nova_dir.bash index f4fcd2783a..73c499c381 100644 --- a/.github/scripts/nova_dir.bash +++ b/.github/scripts/nova_dir.bash @@ -22,7 +22,8 @@ fi ## Overwrite existing ENV VAR in Nova if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi -if [[ "$CU_VERSION" == "cu130" ]] || +if [[ "$CU_VERSION" == "cu132" ]] || + [[ "$CU_VERSION" == "cu130" ]] || [[ "$CU_VERSION" == "cu129" ]] || [[ "$CU_VERSION" == "cu128" ]]; then export TORCH_CUDA_ARCH_LIST="8.0;9.0a;10.0a;12.0a" diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash index 9cc6f5dc0b..9ec2cdcd9c 100644 --- a/.github/scripts/utils_cuda.bash +++ b/.github/scripts/utils_cuda.bash @@ -35,9 +35,21 @@ __set_cuda_symlinks_envvars () { echo "[INSTALL] Copying nvtx3 headers ..." # shellcheck disable=SC2086 - print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/ - # shellcheck disable=SC2086 - print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/ + if compgen -G "${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/*" > /dev/null 2>&1; then + # Copy nvtx3 headers from nsight-compute if available + # shellcheck disable=SC2086 + print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/ + # shellcheck disable=SC2086 + print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/ + elif [ -d "${conda_prefix}/include/nvtx3" ]; then + # nvtx3 headers already available from cuda-nvtx package + echo "[INSTALL] nvtx3 headers already present in ${conda_prefix}/include/nvtx3 (from cuda-nvtx)" + if [ ! -d "${new_cuda_home}/include/nvtx3" ]; then + print_exec cp -r "${conda_prefix}/include/nvtx3" "${new_cuda_home}/include/" + fi + else + echo "[INSTALL] WARNING: nvtx3 headers not found in nsight-compute or cuda-nvtx" + fi fi echo "[INSTALL] Appending libcuda.so path to LD_LIBRARY_PATH ..." @@ -220,8 +232,17 @@ install_cuda () { cuda-nvrtc-dev \ cuda-cupti-dev \ cuda-profiler-api \ - cuda-opencl-dev \ - nsight-compute) || return 1 + cuda-opencl-dev) || return 1 + + # NOTE: nsight-compute is installed separately as best-effort because for + # newer CUDA versions (e.g. 13.2+), it may have unresolvable dependency + # conflicts on conda-forge (libxkbcommon -> libxml2-16 vs clangxx -> + # libllvm16 -> libxml2 <2.14). The nvtx3 headers it provides are handled + # in __set_cuda_symlinks_envvars with a fallback to cuda-nvtx. + # shellcheck disable=SC2086 + (exec_with_retries 3 conda install ${env_prefix} -c conda-forge --override-channels -y \ + "cuda-version=${cuda_version%.*}" \ + nsight-compute) || echo "[INSTALL] WARNING: nsight-compute could not be installed, skipping (nvtx3 headers will be sourced from cuda-nvtx)" fi # Set the symlinks and environment variables not covered by conda install diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml index 2f7e807454..a064318b42 100644 --- a/.github/workflows/fbgemm_gpu_release_cuda.yml +++ b/.github/workflows/fbgemm_gpu_release_cuda.yml @@ -34,7 +34,7 @@ on: description: CUDA Version to Use for Building Artifact type: choice required: false - options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ] + options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ] default: "13.0.2" publish-to-pypi: description: Publish Artifact to PyPI diff --git a/.github/workflows/fbgemm_gpu_release_genai.yml b/.github/workflows/fbgemm_gpu_release_genai.yml index a9f665ffe9..0f3c128bcb 100644 --- a/.github/workflows/fbgemm_gpu_release_genai.yml +++ b/.github/workflows/fbgemm_gpu_release_genai.yml @@ -34,7 +34,7 @@ on: description: CUDA Version to Use for Building Artifact type: choice required: false - options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ] + options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ] default: "13.0.2" publish-to-pypi: description: Publish Artifact to PyPI @@ -72,7 +72,7 @@ jobs: { arch: x86, instance: "linux.12xlarge.memory" }, ] python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ] - cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ] + cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ] steps: - name: Setup Build Container @@ -146,7 +146,7 @@ jobs: { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" }, ] python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ] - cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ] + cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ] needs: build_artifact steps: