Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
# Possible values:
# - ON: enable CUDA with cmake's auto search
# - OFF: disable CUDA
# - /path/to/cuda: use specific path to cuda toolkit
# - /path/to/cuda: use specific path to CUDA toolkit
set(USE_CUDA OFF)

# Whether to enable NCCL support:
Expand Down
2 changes: 1 addition & 1 deletion cmake/modules/CUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
find_cuda(${USE_CUDA} ${USE_CUDNN})

if(CUDA_FOUND)
# always set the includedir when cuda is available
# always set the includedir when CUDA is available
# avoid global retrigger of cmake
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
endif(CUDA_FOUND)
Expand Down
2 changes: 1 addition & 1 deletion cmake/utils/FindCUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# find_cuda(${USE_CUDA} ${USE_CUDNN})
#
# - When USE_CUDA=ON, use auto search
# - When USE_CUDA=/path/to/cuda-path, use the cuda path
# - When USE_CUDA=/path/to/cuda-path, use the CUDA path
# - When USE_CUDNN=ON, use auto search
# - When USE_CUDNN=/path/to/cudnn-path, use the cudnn path
#
Expand Down
2 changes: 1 addition & 1 deletion docs/install/docker.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Docker Images
We provide docker utility scripts to help developers to setup development environment.
They are also helpful run through TVM demo and tutorials.
We need `docker <https://docs.docker.com/engine/installation/>`_ and
`nvidia-docker <https://github.com/NVIDIA/nvidia-docker/>`_ if we want to use cuda.
`nvidia-docker <https://github.com/NVIDIA/nvidia-docker/>`_ if we want to use CUDA.

Get a tvm source distribution or clone the GitHub repo to get the auxiliary scripts

Expand Down
2 changes: 1 addition & 1 deletion include/tvm/s_tir/meta_schedule/postproc.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class Postproc : public runtime::ObjectRef {
TVM_DLL static Postproc RewriteReductionBlock();
/*!
* \brief Create a postprocessor that adds thread binding to unbound blocks
* \param max_threadblocks The max number of threadblocks in the cuda device.
* \param max_threadblocks The max number of threadblocks in the CUDA device.
* \return The postprocessor created.
*/
TVM_DLL static Postproc RewriteUnboundBlock(int max_threadblocks);
Expand Down
2 changes: 1 addition & 1 deletion include/tvm/tir/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ namespace attr {
* The size of the shared memory that may be allocated internally by
* the kernel. For example, exposed as the
* CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES attribute in
* cuda.
* CUDA.
*
* Defined as "tir.use_dyn_shared_memory".
*
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/contrib/cutlass/gemm_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self):
{ \\
cudaError_t error = status; \\
if (error != cudaSuccess) { \\
std::cerr << "Got bad cuda status: " << cudaGetErrorString(error) \\
std::cerr << "Got bad CUDA status: " << cudaGetErrorString(error) \\
<< " at line: " << __LINE__ << std::endl; \\
exit(EXIT_FAILURE); \\
} \\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_trt_common_h_code() -> str:
do { \\
auto ret = (status); \\
if (ret != 0) { \\
std::cout << "Cuda failure: " << ret << std::endl; \\
std::cout << "CUDA failure: " << ret << std::endl; \\
abort(); \\
} \\
} while (0)
Expand Down
22 changes: 11 additions & 11 deletions python/tvm/contrib/nvcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,18 @@
def compile_cuda(
code, target_format=None, arch=None, options=None, path_target=None, compiler="nvcc"
):
"""Compile cuda code with NVCC or NVRTC.
"""Compile CUDA code with NVCC or NVRTC.

Parameters
----------
code : str
The cuda code.
The CUDA code.

target_format : str
The target format of the compiler ("ptx", "cubin", or "fatbin").

arch : str
The cuda architecture.
The CUDA architecture.

options : str or list of str
The additional options.
Expand Down Expand Up @@ -78,7 +78,7 @@ def compile_cuda(
elif compiler == "nvrtc":
result = _compile_cuda_nvrtc(code, target_format, arch, options, path_target, use_nvshmem)
else:
raise ValueError(f"cuda compiler must be 'nvcc' or 'nvrtc', got: {compiler}")
raise ValueError(f"CUDA compiler must be 'nvcc' or 'nvrtc', got: {compiler}")

return result

Expand Down Expand Up @@ -623,12 +623,12 @@ def _link_nvshmem_nvrtc(binary_buf, nvshmem_lib_path):


def find_cuda_path():
"""Utility function to find cuda path
"""Utility function to find CUDA path

Returns
-------
path : str
Path to cuda root.
Path to CUDA root.
"""
if "CUDA_PATH" in os.environ:
return os.environ["CUDA_PATH"]
Expand All @@ -641,23 +641,23 @@ def find_cuda_path():
cuda_path = "/usr/local/cuda"
if os.path.exists(os.path.join(cuda_path, "bin/nvcc")):
return cuda_path
raise RuntimeError("Cannot find cuda path")
raise RuntimeError("Cannot find CUDA path")


def get_cuda_version(cuda_path=None):
"""Utility function to get cuda version
"""Utility function to get CUDA version

Parameters
----------
cuda_path : Optional[str]

Path to cuda root. If None is passed, will use
Path to CUDA root. If None is passed, will use
`find_cuda_path()` as default.

Returns
-------
version : float
The cuda version
The CUDA version

"""
if cuda_path is None:
Expand All @@ -683,7 +683,7 @@ def get_cuda_version(cuda_path=None):
release_fields = [s.strip() for s in release_line.split(",")]
version_str = [f[1:] for f in release_fields if f.startswith("V")][0]
return tuple(int(field) for field in version_str.split("."))
raise RuntimeError("Cannot read cuda version file")
raise RuntimeError("Cannot read CUDA version file")


def find_nvshmem_paths() -> Tuple[str, str]:
Expand Down
4 changes: 2 additions & 2 deletions python/tvm/contrib/xcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ def create_dylib(output, objects, arch, sdk="macosx", min_os_version=None):


def compile_metal(code, path_target=None, sdk="macosx", min_os_version=None):
"""Compile metal with CLI tool from env.
"""Compile Metal with CLI tool from env.

Parameters
----------
code : str
The cuda code.
The Metal code.

path_target : str, optional
Output file.
Expand Down
26 changes: 13 additions & 13 deletions python/tvm/runtime/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def exist(self):

Returns True if TVM has support for the device, if the
physical device is present, and the device is accessible
through appropriate drivers (e.g. cuda/vulkan).
through appropriate drivers (e.g. CUDA/Vulkan).

Returns
-------
Expand All @@ -54,7 +54,7 @@ def exist(self):
def max_threads_per_block(self):
"""Maximum number of threads on each block.

Returns device value for cuda, metal, rocm, opencl, and vulkan
Returns device value for CUDA, Metal, ROCm, OpenCL, and Vulkan
devices. Returns remote device value for RPC devices.
Returns None for all other devices.

Expand All @@ -70,8 +70,8 @@ def max_threads_per_block(self):
def warp_size(self):
"""Number of threads that execute concurrently.

Returns device value for cuda, rocm, and vulkan. Returns
1 for metal and opencl devices, regardless of the physical
Returns device value for CUDA, ROCm, and Vulkan. Returns
1 for Metal and OpenCL devices, regardless of the physical
device. Returns remote device value for RPC devices. Returns
None for all other devices.

Expand All @@ -87,7 +87,7 @@ def warp_size(self):
def max_shared_memory_per_block(self):
"""Total amount of shared memory per block in bytes.

Returns device value for cuda, rocm, opencl, and vulkan.
Returns device value for CUDA, ROCm, OpenCL, and Vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.

Expand All @@ -106,8 +106,8 @@ def compute_version(self):
Returns maximum API version (e.g. CUDA/OpenCL/Vulkan)
supported by the device.

Returns device value for cuda, rocm, opencl, and
vulkan. Returns remote device value for RPC devices. Returns
Returns device value for CUDA, ROCm, OpenCL, and
Vulkan. Returns remote device value for RPC devices. Returns
None for all other devices.

Returns
Expand All @@ -122,7 +122,7 @@ def compute_version(self):
def device_name(self):
"""Return the vendor-specific name of device.

Returns device value for cuda, rocm, opencl, and vulkan.
Returns device value for CUDA, ROCm, OpenCL, and Vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.

Expand All @@ -138,7 +138,7 @@ def device_name(self):
def max_clock_rate(self):
"""Return the max clock frequency of device (kHz).

Returns device value for cuda, rocm, and opencl. Returns
Returns device value for CUDA, ROCm, and OpenCL. Returns
remote device value for RPC devices. Returns None for all
other devices.

Expand All @@ -154,7 +154,7 @@ def max_clock_rate(self):
def multi_processor_count(self):
"""Return the number of compute units in the device.

Returns device value for cuda, rocm, and opencl. Returns
Returns device value for CUDA, ROCm, and OpenCL. Returns
remote device value for RPC devices. Returns None for all
other devices.

Expand All @@ -170,7 +170,7 @@ def multi_processor_count(self):
def max_thread_dimensions(self):
"""Return the maximum size of each thread axis

Returns device value for cuda, rocm, opencl, and vulkan.
Returns device value for CUDA, ROCm, OpenCL, and Vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.

Expand All @@ -186,10 +186,10 @@ def max_thread_dimensions(self):
def api_version(self):
"""Returns version number of the SDK used to compile TVM.

For example, CUDA_VERSION for cuda or VK_HEADER_VERSION for
For example, CUDA_VERSION for CUDA or VK_HEADER_VERSION for
Vulkan.

Returns device value for cuda, rocm, opencl, and vulkan.
Returns device value for CUDA, ROCm, OpenCL, and Vulkan.
Returns remote device value for RPC devices. Returns None for
all other devices.

Expand Down
2 changes: 1 addition & 1 deletion python/tvm/runtime/executable.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def jit(
"""Just-in-time compile and link the modules.
The Executable returned by tvm.compile may not be directly
runnable as they may contain cuda source files and objects that
runnable as they may contain CUDA source files and objects that
are yet to be compiled and linked.
This function helps to create a runtime.Module for these cases.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""layout_transform scheduling rule for cuda."""
"""layout_transform scheduling rule for CUDA."""

import math
from collections import deque
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
MARKERS = {
"gpu": "mark a test as requiring a gpu",
"tensorcore": "mark a test as requiring a tensorcore",
"cuda": "mark a test as requiring cuda",
"cuda": "mark a test as requiring CUDA",
"opencl": "mark a test as requiring opencl",
"rocm": "mark a test as requiring rocm",
"vulkan": "mark a test as requiring vulkan",
Expand Down
4 changes: 2 additions & 2 deletions python/tvm/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,7 +1197,7 @@ def requires_nvcc_version(major_version, minor_version=0, release_version=0):
installed version of NVCC is at least `(major_version,
minor_version, release_version)`.
This also marks the test as requiring a cuda support.
This also marks the test as requiring a CUDA support.
Parameters
----------
Expand Down Expand Up @@ -1240,7 +1240,7 @@ def requires_cuda_compute_version(major_version, minor_version=0):
compute architecture of the GPU is at least `(major_version,
minor_version)`.
This also marks the test as requiring a cuda support.
This also marks the test as requiring a CUDA support.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/topi/gpu/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ def dual_mergepath(

with T.serial(0, cast(upper_lim - lower_lim, target_dtype)) as l2_width:
width = 2 << (l2_width + lower_lim)
# Define and launch the cuda kernel
# Define and launch the CUDA kernel
target = tvm.target.Target.current()
if "vulkan" in str(target):
ntx = max_threads
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/contrib/cublas/cublas_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ inline cudaDataType_t GetCudaDataType(DLDataType type) {
return CUDA_R_16BF;
}
}
LOG(FATAL) << "Unsupported cuda type";
LOG(FATAL) << "Unsupported CUDA type";
}

/*! \brief Execute matrix multiply followed by the specified epilogue, using cuBLASLt. */
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/contrib/nvshmem/memory_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class NVSHMEMAllocator final : public PooledAllocator {
void* DeviceAllocDataSpace(Device dev, size_t size, size_t alignment,
DLDataType type_hint) final {
ICHECK_EQ(dev.device_type, DLDeviceType::kDLCUDA)
<< "nvshmem can only allocate cuda device memory space.";
<< "nvshmem can only allocate CUDA device memory space.";
ICHECK(type_hint.code == DLDataTypeCode::kDLInt || type_hint.code == DLDataTypeCode::kDLUInt ||
type_hint.code == DLDataTypeCode::kDLFloat)
<< "nvshmem can only allocate tensor with int, usingned int or float data types.";
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/contrib/papi/papi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ int component_for_device(Device dev) {
/*! \brief MetricCollectorNode for PAPI metrics.
*
* PAPI (Performance Application Programming Interface) collects metrics on a
* variety of platforms including cpu, cuda and rocm.
* variety of platforms including CPU, CUDA and ROCm.
*
* PAPI is avaliable at https://github.com/icl-utk-edu/papi.
*/
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/cuda/cuda_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ namespace runtime {
static constexpr const int kMaxNumGPUs = 32;

/*!
* \brief create a cuda module from data.
* \brief create a CUDA module from data.
*
* \param data The module data, can be ptx, cubin
* \param fmt The format of the data, can be "ptx", "cubin"
* \param fmap The map function information map of each function.
* \param cuda_source Optional, cuda source file
* \param cuda_source Optional, CUDA source file
*/
ffi::Module CUDAModuleCreate(std::string data, std::string fmt,
ffi::Map<ffi::String, FunctionInfo> fmap, std::string cuda_source);
Expand Down
2 changes: 1 addition & 1 deletion src/s_tir/meta_schedule/postproc/rewrite_unbound_block.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class RewriteUnboundBlockNode : public PostprocNode {
public:
/*! \brief The max number of threads per block from Target */
int max_threads_per_block_ = -1;
/*! \brief The max number of threadblocks in the cuda device */
/*! \brief The max number of threadblocks in the CUDA device */
int max_threadblocks_ = -1;

static void RegisterReflection() {
Expand Down
2 changes: 1 addition & 1 deletion src/s_tir/meta_schedule/schedule_rule/auto_bind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class AutoBindNode : public ScheduleRuleNode {
public:
/*! \brief The max number of threads per block from Target */
int64_t max_threads_per_block_ = -1;
/*! \brief The max number of threadblocks in the cuda device */
/*! \brief The max number of threadblocks in the CUDA device */
int64_t max_threadblocks_ = -1;
/*! \brief thread_extents Candidates of thread axis extent. */
ffi::Array<Integer> thread_extents_;
Expand Down
2 changes: 1 addition & 1 deletion src/target/opt/build_cuda_off.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*!
* Optional module when build cuda is switched to off
* Optional module when build CUDA is switched to off
*/
#include "../../runtime/cuda/cuda_module.h"
namespace tvm {
Expand Down
Loading