diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_free_function_kernels.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_free_function_kernels.asciidoc index 5ef1c663ee431..f9d630bdb2455 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_free_function_kernels.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_free_function_kernels.asciidoc @@ -626,7 +626,7 @@ which can be used by the `kernel::get_info()` overload. _Returns:_ The same value `ret` that would be computed by: -[source,c++] +[source,c++,indent=2] ---- auto bundle = sycl::get_kernel_bundle(ctxt); @@ -664,7 +664,7 @@ The kernel `Func` must be compatible with the device `dev` as defined by _Returns:_ The same value `ret` that would be computed by: -[source,c++] +[source,c++,indent=2] ---- auto bundle = sycl::get_kernel_bundle(ctxt); @@ -683,7 +683,7 @@ a@ ---- namespace sycl::ext::oneapi::experimental { -template +template typename Param::return_type get_kernel_info(const queue& q); } // namespace sycl::ext::oneapi::experimental @@ -699,7 +699,7 @@ with `q` as defined by `is_compatible`. _Returns:_ The same value `ret` that would be computed by: -[source,c++] +[source,c++,indent=2] ---- sycl::context ctxt = q.get_context(); sycl::device dev = q.get_device(); @@ -711,6 +711,106 @@ auto ret = bundle.ext_oneapi_get_kernel().get_info(dev); _Remarks:_ Each information descriptor may specify additional preconditions, exceptions that are thrown, etc. +''' + +[frame=all,grid=none,separator="@"] +!==== +a@ +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<1> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<2> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<3> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<1> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<2> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<3> r, + LaunchProperties props = {}, + size_t bytes = 0); + +} // namespace sycl::ext::oneapi::experimental +---- +!==== + +_Constraints_: Available only if: + +* `is_kernel_v` is `true`, +* `Param` is + `ext::oneapi::experimental::info::kernel::max_num_work_groups_sync`, and +* `LaunchProperties` is a property list that contains only kernel launch + properties. + +_Preconditions_ (1) - (3): The device `dev` must be one of the devices contained +by `ctxt` or must be a descendent device of some device in `ctxt`. +The kernel `Func` must be compatible with the device `dev` as defined by +`is_compatible`. + +_Preconditions_ (4) - (6): The kernel `Func` must be compatible with the device +associated with `q` as defined by `is_compatible`. + +_Returns_ (1) - (3): The same value `ret` that would be computed by: + +[source,c++,indent=2] +---- +auto bundle = + sycl::get_kernel_bundle(ctxt); +sycl::kernel k = bundle.ext_oneapi_get_kernel(); +auto ret = k.ext_oneapi_get_info(dev, r, props, bytes); +---- + +_Returns_ (4) - (6): The same value `ret` that would be computed by: + +[source,c++,indent=2] +---- +sycl::context ctxt = q.get_context(); +sycl::device dev = q.get_device(); +auto bundle = + sycl::get_kernel_bundle(ctxt); +sycl::kernel k = bundle.ext_oneapi_get_kernel(); +auto ret = k.ext_oneapi_get_info(dev, r, props, bytes); +---- + +_Remarks:_ An implementation provides these functions only if it also implements +the link:../experimental/sycl_ext_oneapi_root_group.asciidoc[ +sycl_ext_oneapi_root_group] extension. + === Behavior with kernel bundle functions in the core SYCL specification Free function kernels that are defined by the application have a corresponding diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc index 7bfcbf8155bf4..bdde83f004f30 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_root_group.asciidoc @@ -46,8 +46,6 @@ This extension also depends on the following other SYCL extensions: sycl_ext_oneapi_properties] * link:../experimental/sycl_ext_oneapi_kernel_properties.asciidoc[ sycl_ext_oneapi_kernel_properties] -* link:../proposed/sycl_ext_oneapi_launch_queries.asciidoc[ - sycl_ext_oneapi_launch_queries] == Status @@ -117,12 +115,6 @@ struct KernelName { sycl::group_barrier(root); } - auto get(syclex::properties_tag) const { - // Kernels that use root-group synchronization must be decorated with the - // "use_root_sync" kernel property. - return syclex::properties{syclex::use_root_sync}; - } - size_t *data; }; @@ -130,23 +122,25 @@ int main() { sycl::queue q; // When a kernel uses root-group synchronization, the total number of - // work-groups is limited. This limit can vary depending on the kernel, - // so get a "kernel" object representing the kernel when plan to launch. - auto bundle = sycl::get_kernel_bundle( - q.get_context()); - auto kernel = bundle.get_kernel(); - - // Get the maximum number of work-groups for this kernel. The limit also - // depends on the size of each work-group (which is 32 in this example), and - // on the amount of work-group local memory (which is 0 in this example). - auto maxWGs = kernel.ext_oneapi_get_info< - syclex::info::kernel_queue_specific::max_num_work_groups>(q, 32, 0); + // work-groups is limited. This limit is specific to the kernel, the device, + // the work-group size (which is 32 in this example), the launch parameters, + // and the amount of dynamically allocated work-group local memory (which is + // zero in this example). + syclex::properties props{syclex::use_root_sync}; + auto maxWGs = syclex::get_kernel_info(q, 32, props, 0); // Construct an nd-range which launches the maximum number of work-groups. auto ndr = sycl::nd_range<1>{maxWGs * 32, 32}; + // When a kernel uses root-group synchronization, it must be launched with the + // "use_root_sync" property. Construct a launch configuration with this + // property. + syclex::launch_config cfg{ndr, props}; + size_t *data = sycl::malloc_device(maxWGs * 32, q); - q.parallel_for(ndr, KernelName{data}).wait(); + syclex::nd_launch(q, cfg, KernelName{data}); + q.wait(); } ---- @@ -174,7 +168,183 @@ supports. |=== -=== Kernel properties +=== Kernel information descriptor + +This extension adds the following kernel information descriptor. + +[source,c++] +---- +namespace sycl::ext::oneapi::experimental::info::kernel { + +struct max_num_work_groups_sync { + using return_type = size_t; +} + +} // namespace sycl::ext::oneapi::experimental +---- + + +=== Kernel information descriptor queries + +This extension adds the following member functions to the `kernel` class. + +[source,c++] +---- +namespace sycl { + +class kernel { + public: + template + typename Param::return_type ext_oneapi_get_info(const device& dev, + sycl::range<1> r, + LaunchProperties props = {}, + size_t bytes = 0) const; + + template + typename Param::return_type ext_oneapi_get_info(const device& dev, + sycl::range<2> r, + LaunchProperties props = {}, + size_t bytes = 0) const; + + template + typename Param::return_type ext_oneapi_get_info(const device& dev, + sycl::range<3> r, + LaunchProperties props = {}, + size_t bytes = 0) const; +}; + +} // namespace sycl +---- + +_Constraints:_ + +* `Param` is + `ext::oneapi::experimental::info::kernel::max_num_work_groups_sync`, and +* `LaunchProperties` is a property list that contains only kernel launch + properties. + +_Returns:_ The maximum number of work-groups that are allowed when the kernel +uses root-group synchronization, assuming the kernel is launched with a +work-group size of `r`, kernel launch properties `props`, and `bytes` bytes of +dynamic work-group local memory. +If the kernel can be submitted to the device without an error, the minimum value +returned by this query is 1, otherwise it is 0. + +If the kernel uses +link:../experimental/sycl_ext_oneapi_work_group_scratch_memory.asciidoc[ +sycl_ext_oneapi_work_group_scratch_memory] to allocate dynamic work-group local +memory, the amount of that memory can be specified either by including +`work_group_scratch_size` in `props` or by specifying the size via `bytes`. +It should only be specified in one of these ways, though, because the query adds +these value together to get the total amount of dynamic work-group local memory. + +_Remarks:_ This query implicitly assumes that the kernel will be launched with +the `use_root_sync` kernel launch property. +It is not necessary to include this property in `props`, but there is no harm in +doing so. + +''' + +This extension also adds the following shortcut functions to query a kernel's +information descriptor. + +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<1> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<2> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const context& ctxt, + const device& dev, + sycl::range<3> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<1> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<2> r, + LaunchProperties props = {}, + size_t bytes = 0); + +template +typename Param::return_type get_kernel_info(const queue& q, + sycl::range<3> r, + LaunchProperties props = {}, + size_t bytes = 0); + +} // namespace sycl::ext::oneapi::experimental +---- + +_Constraints:_ + +* `Param` is + `ext::oneapi::experimental::info::kernel::max_num_work_groups_sync`, and +* `LaunchProperties` is a property list that contains only kernel launch + properties. + +_Preconditions_ (1) - (3): The `KernelName` must be the type kernel name of a +kernel that is defined in the application. +The device `dev` must be one of the devices contained by `ctxt` or must be a +descendent device of some device in `ctxt`. +The kernel `KernelName` must be compatible with the device `dev` as defined by +`is_compatible`. + +_Preconditions_ (4) - (6): The `KernelName` must be the type kernel name of a +kernel that is defined in the application. +The kernel `KernelName` must be compatible with the device associated with `q` +as defined by `is_compatible`. + +_Returns_ (1) - (3): The same value `ret` that would be computed by: + +[source,c++,indent=2] +---- +auto bundle = + sycl::get_kernel_bundle(ctxt); +sycl::kernel k = bundle.get_kernel(); +auto ret = k.ext_oneapi_get_info(dev, r, props, bytes); +---- + +_Returns_ (4) - (6): The same value `ret` that would be computed by: + +[source,c++,indent=2] +---- +sycl::context ctxt = q.get_context(); +sycl::device dev = q.get_device(); +auto bundle = + sycl::get_kernel_bundle(ctxt); +sycl::kernel k = bundle.get_kernel(); +auto ret = k.ext_oneapi_get_info(dev, r, props, bytes); +---- + + +=== Kernel launch property + +This extension adds the following kernel launch property. [source,c++] ---- @@ -195,16 +365,16 @@ inline constexpr use_root_sync_key::value_t use_root_sync; |The `use_root_sync` property adds the requirement that the kernel must be launched in a manner that is compatible with using a root-group in group functions and algorithms. If the `sycl::nd_range` parameter used to launch the - kernel is incompatible with the results of the launch queries described in the - sycl_ext_oneapi_launch_queries extension, an implementation must throw a - synchronous exception with the `errc::nd_range` error code. + kernel is incompatible with the maximum number of work-groups returned by the + `max_num_work_groups_sync` query, an implementation must throw a synchronous + exception with the `errc::nd_range` error code. |=== === The `root_group` class The `root_group` class implements all member functions common to the -`sycl::group` and `sycl::sub_group` classes and also contains own +`sycl::group` and `sycl::sub_group` classes and also contains its own additional functions. [source,c++] @@ -436,13 +606,13 @@ NOTE: Support for passing the `root_group` to other group functions and algorithms may be added in a future version of this extension. These group functions and algorithms act as synchronization points, and can -only be used in kernels decorated with the `use_root_sync` property. -Attempting to call these functions in kernels that were not decorated with the +only be used in kernels launched with the `use_root_sync` property. +Attempting to call these functions in kernels that were not launched with the `use_root_sync` property results in undefined behavior. NOTE: Implementations are encouraged to throw a synchronous error with the `errc::invalid` error code if they are able to detect that a developer has -attempted to synchronize a `root_group` from an incompatible kernel. +attempted to synchronize a `root_group` from an incompatible kernel launch. === Accessing the `root_group` instance