Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,3 @@ The table below lists the soon to be removed deprecated descriptors and their re

* Note *N* can take the value 1,2, or 3


## Implementation

### Consistency with existing checks

The implementation already checks when enqueuing a kernel that the global and per dimension work-group number is smaller than `std::numeric_limits<int>::max`. This check is implemented in `sycl/include/sycl/handler.hpp`. For consistency, values returned by the two device descriptors are bound by this limit.

### Example of returned values

- If the device is the host or has an OpenCL back-end, the values returned - as they are not applicable - are the maximum values accepted at kernel submission (see `sycl/include/sycl/handler.hpp`) which are currently `std::numeric_limits<int>::max`.
- CUDA: Back-end query using `CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_[X,Y,Z]`.
4 changes: 3 additions & 1 deletion sycl/include/sycl/info/ext_oneapi_device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#define __SYCL_PARAM_TRAITS_TEMPLATE_SPEC __SYCL_PARAM_TRAITS_SPEC
#endif

__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental,device, max_global_work_groups, size_t, __SYCL_TRAIT_HANDLED_IN_RT)
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device,
max_global_work_groups, size_t,
UR_DEVICE_INFO_MAX_WORK_GROUPS)
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<1>, id<1>, __SYCL_TRAIT_HANDLED_IN_RT)
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<2>, id<2>, __SYCL_TRAIT_HANDLED_IN_RT)
__SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_groups<3>, id<3>, UR_DEVICE_INFO_MAX_WORK_GROUPS_3D)
Expand Down
10 changes: 2 additions & 8 deletions sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -892,20 +892,14 @@ class device_impl {
// ext_oneapi_device_traits.def

CASE(ext::oneapi::experimental::info::device::max_global_work_groups) {
return static_cast<size_t>((std::numeric_limits<int>::max)());
return get_info_impl<UR_DEVICE_INFO_MAX_WORK_GROUPS>();
}
CASE(ext::oneapi::experimental::info::device::max_work_groups<3>) {
size_t Limit = get_info<
ext::oneapi::experimental::info::device::max_global_work_groups,
DependentFalse>();

// TODO: std::array<size_t, 3> ?
size_t result[3];
getAdapter().call<UrApiKind::urDeviceGetInfo>(
getHandleRef(), UR_DEVICE_INFO_MAX_WORK_GROUPS_3D, sizeof(result),
&result, nullptr);
return id<3>(std::min(Limit, result[2]), std::min(Limit, result[1]),
std::min(Limit, result[0]));
return id<3>(result[2], result[1], result[0]);
Comment thread
uditagarwal97 marked this conversation as resolved.
}
CASE(ext::oneapi::experimental::info::device::max_work_groups<2>) {
id<3> max_3d =
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/ur_device_info_ret_types.inc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ MAP(UR_DEVICE_INFO_KERNEL_LAUNCH_CAPABILITIES, ur_kernel_launch_properties_flags
// Manually changed std::vector<uint8_t> -> std::array<uint8_t, 8>
MAP(UR_DEVICE_INFO_LUID, std::array<uint8_t, 8>)
MAP(UR_DEVICE_INFO_NODE_MASK, uint32_t)
MAP(UR_DEVICE_INFO_MAX_WORK_GROUPS, size_t)

// These aren't present in the specification, extracted from ur_api.h
// instead.
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/include/unified-runtime/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2378,6 +2378,8 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG_LONG = 131,
/// [uint32_t] native vector width for long long
UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG_LONG = 132,
/// [size_t] return max total number of work groups
UR_DEVICE_INFO_MAX_WORK_GROUPS = 133,
/// [::ur_bool_t] Returns true if the device supports the use of
/// command-buffers.
UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000,
Expand Down
16 changes: 16 additions & 0 deletions unified-runtime/include/unified-runtime/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3178,6 +3178,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG_LONG:
os << "UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG_LONG";
break;
case UR_DEVICE_INFO_MAX_WORK_GROUPS:
os << "UR_DEVICE_INFO_MAX_WORK_GROUPS";
break;
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP";
break;
Expand Down Expand Up @@ -5018,6 +5021,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr,

os << ")";
} break;
case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
const size_t *tptr = (const size_t *)ptr;
Comment thread
uditagarwal97 marked this conversation as resolved.
if (sizeof(size_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(size_t)
<< ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/scripts/core/device.yml
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,8 @@ etors:
desc: "[uint32_t] preferred vector width for long long"
- name: NATIVE_VECTOR_WIDTH_LONG_LONG
desc: "[uint32_t] native vector width for long long"
- name: MAX_WORK_GROUPS
desc: "[size_t] return max total number of work groups"
--- #--------------------------------------------------------------------------
type: function
desc: "Retrieves various information about device"
Expand Down
19 changes: 19 additions & 0 deletions unified-runtime/source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(ReturnSizes);
}

case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
int MaxX = 0, MaxY = 0, MaxZ = 0;
UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get()));
assert(MaxX >= 0);

UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxY, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, hDevice->get()));
assert(MaxY >= 0);

UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get()));
assert(MaxZ >= 0);

return ReturnValue(multiplyWithOverflowCheck(static_cast<size_t>(MaxX),
static_cast<size_t>(MaxY),
static_cast<size_t>(MaxZ)));
}

case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
int MaxWorkGroupSize = 0;
UR_CHECK_ERROR(cuDeviceGetAttribute(
Expand Down
19 changes: 19 additions & 0 deletions unified-runtime/source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(return_sizes);
}

case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
int MaxX = 0, MaxY = 0, MaxZ = 0;
UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxX, hipDeviceAttributeMaxGridDimX,
hDevice->get()));
assert(MaxX >= 0);

UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxY, hipDeviceAttributeMaxGridDimY,
hDevice->get()));
assert(MaxY >= 0);

UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxZ, hipDeviceAttributeMaxGridDimZ,
hDevice->get()));
assert(MaxZ >= 0);

return ReturnValue(multiplyWithOverflowCheck(static_cast<size_t>(MaxX),
static_cast<size_t>(MaxY),
static_cast<size_t>(MaxZ)));
}

case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
int MaxWorkGroupSize = 0;
UR_CHECK_ERROR(hipDeviceGetAttribute(&MaxWorkGroupSize,
Expand Down
8 changes: 8 additions & 0 deletions unified-runtime/source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,14 @@ ur_result_t urDeviceGetInfo(
Device->ZeDeviceComputeProperties->maxGroupCountZ}};
return ReturnValue(MaxGroupCounts);
}
case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
// Multiply the max group counts in each dimension to get the total max
// number of work groups. Prevent overflow.
return ReturnValue(multiplyWithOverflowCheck(
Device->ZeDeviceComputeProperties->maxGroupCountX,
Device->ZeDeviceComputeProperties->maxGroupCountY,
Device->ZeDeviceComputeProperties->maxGroupCountZ));
}
case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY:
return ReturnValue(uint32_t{Device->ZeDeviceProperties->coreClockRate});
case UR_DEVICE_INFO_ADDRESS_BITS: {
Expand Down
1 change: 1 addition & 0 deletions unified-runtime/source/adapters/native_cpu/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS:
case UR_DEVICE_INFO_IL_VERSION:
case UR_DEVICE_INFO_MAX_WORK_GROUPS_3D:
case UR_DEVICE_INFO_MAX_WORK_GROUPS:
case UR_DEVICE_INFO_MEMORY_CLOCK_RATE:
case UR_DEVICE_INFO_MEMORY_BUS_WIDTH:
case UR_DEVICE_INFO_GLOBAL_MEM_FREE:
Expand Down
23 changes: 23 additions & 0 deletions unified-runtime/source/adapters/offload/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//===----------------------------------------------------------------------===//

#include <OffloadAPI.h>
#include <limits>
#include <unified-runtime/ur_api.h>
#include <ur/ur.hpp>

Expand Down Expand Up @@ -212,6 +213,28 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,

return UR_RESULT_SUCCESS;
}
case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
// OL dimensions are uint32_t while UR is size_t, so they need to be mapped.
if (pPropSizeRet) {
*pPropSizeRet = sizeof(size_t);
}

if (pPropValue) {
ol_dimensions_t olVec;
OL_RETURN_ON_ERR(olGetDeviceInfo(
hDevice->OffloadDevice, OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION,
sizeof(olVec), &olVec));

// Multiply the max group counts in each dimension to get the total max
// number of work groups. Prevent overflow.
*reinterpret_cast<size_t *>(pPropValue) = multiplyWithOverflowCheck(
*reinterpret_cast<size_t *>(pPropValue) = multiplyWithOverflowCheck(
static_cast<size_t>(olVec.x), static_cast<size_t>(olVec.y),
static_cast<size_t>(olVec.z));
}

return UR_RESULT_SUCCESS;
}

// Unimplemented features
case UR_DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS:
Expand Down
3 changes: 3 additions & 0 deletions unified-runtime/source/adapters/opencl/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
ReturnSizes.sizes[2] = Max;
return ReturnValue(ReturnSizes);
}
case UR_DEVICE_INFO_MAX_WORK_GROUPS: {
return ReturnValue(std::numeric_limits<size_t>::max());
Comment thread
uditagarwal97 marked this conversation as resolved.
}
case UR_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
return ReturnValue(static_cast<uint32_t>(1u));
}
Expand Down
31 changes: 31 additions & 0 deletions unified-runtime/source/common/ur_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,37 @@ inline ur_result_t exceptionToResult(std::exception_ptr eptr) {
}
}

// Multiply a, b and c, and check for overflow. If overflow occurs, return
// MAX_SIZE_T.
inline size_t multiplyWithOverflowCheck(size_t a, size_t b, size_t c) {

size_t Product = 0;
size_t MaxSizeTVal = std::numeric_limits<size_t>::max();

if (a == 0 || b == 0 || c == 0) {
return 0;
}

#ifndef _MSC_VER
if (__builtin_mul_overflow(a, b, &Product) ||
__builtin_mul_overflow(Product, c, &Product)) {
return MaxSizeTVal; // Overflow occurred, return max possible value.
}
#else
if (b > MaxSizeTVal / a) {
return MaxSizeTVal; // Overflow occurred, return max possible value.
}
Product = a * b;

if (c > MaxSizeTVal / Product) {
return MaxSizeTVal; // Overflow occurred, return max possible value.
}
Product *= c;
#endif

Comment thread
uditagarwal97 marked this conversation as resolved.
return Product;
}

template <class> inline constexpr bool ur_always_false_t = false;

namespace {
Expand Down
19 changes: 18 additions & 1 deletion unified-runtime/test/conformance/device/urDeviceGetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1848,6 +1848,23 @@ TEST_P(urDeviceGetInfoTest, SuccessMemoryBusWidth) {
property_value);
}

TEST_P(urDeviceGetInfoTest, SuccessMaxGlobalWorkGroups) {
UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});

size_t property_size = 0;
const ur_device_info_t property_name = UR_DEVICE_INFO_MAX_WORK_GROUPS;

ASSERT_SUCCESS(
urDeviceGetInfo(device, property_name, 0, nullptr, &property_size));
ASSERT_EQ(property_size, sizeof(size_t));

size_t max_global_work_groups = 0;
ASSERT_SUCCESS(urDeviceGetInfo(device, property_name,
sizeof(max_global_work_groups),
&max_global_work_groups, nullptr));
ASSERT_GT(max_global_work_groups, 0u);
}

TEST_P(urDeviceGetInfoTest, SuccessMaxWorkGroups3D) {
UUR_KNOWN_FAILURE_ON(uur::NativeCPU{});

Expand All @@ -1859,7 +1876,7 @@ TEST_P(urDeviceGetInfoTest, SuccessMaxWorkGroups3D) {
ASSERT_EQ(property_size, sizeof(size_t) * 3);

std::array<size_t, 3> max_work_group_sizes = {};
ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_MAX_WORK_GROUPS_3D,
ASSERT_SUCCESS(urDeviceGetInfo(device, property_name,
sizeof(max_work_group_sizes),
max_work_group_sizes.data(), nullptr));
for (size_t i = 0; i < 3; i++) {
Expand Down
2 changes: 2 additions & 0 deletions unified-runtime/tools/urinfo/urinfo.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading