Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,10 @@ static constexpr Property<uint64_t, PropertyMutability::RW> dynamic_quantization
"GPU_DYNAMIC_QUANTIZATION_GROUP_SIZE_MAX"};

/**
* @brief Turning on this key switches addressing mode to allow allocations larger than 4GB
* as described here:
* @brief Turning on this key bypasses the device max allocation size check and switches
* addressing mode to allow allocations larger than 4GB as described here:
* https://github.com/intel/compute-runtime/blob/master/programmers-guide/ALLOCATIONS_GREATER_THAN_4GB.md#creating-allocations-greater-than-4GB
* This is also useful when a single buffer exceeds the device's CL_DEVICE_MAX_MEM_ALLOC_SIZE.
* Note: Performance may be lower with this option enabled.
* @ingroup ov_runtime_ocl_gpu_prop_cpp_api
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hin
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_lora_operation, true, "Enable/Disable LoRA operation. The separate operation is less versatile, but has better performance")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_large_allocations, false, "Enable/Disable large buffer allocations (>4gb). Enabling this option may lead to performance degradation")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_large_allocations, false, "Allow buffer allocations that exceed the device max allocation size. Enabling this option may lead to performance degradation")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available")
OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "")
Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_gpu/src/runtime/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ bool engine::check_allocatable(const layout& layout, allocation_type type) {
OPENVINO_ASSERT(!exceed_allocatable_mem_size,
"[GPU] Exceeded max size of memory object allocation: ",
"requested ", layout.bytes_count(), " bytes, "
"but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes.",
"Please try to reduce batch size or use lower precision.");
"but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes. ",
"Please try to reduce batch size, use lower precision, "
"or set GPU_ENABLE_LARGE_ALLOCATIONS property to true.");
}

auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);
Expand Down
Loading