openvinotoolkit · Sahilbhatane · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
@@ -159,9 +159,10 @@ static constexpr Property<uint64_t, PropertyMutability::RW> dynamic_quantization
     "GPU_DYNAMIC_QUANTIZATION_GROUP_SIZE_MAX"};
 
 /**
- * @brief Turning on this key switches addressing mode to allow allocations larger than 4GB
- * as described here:
+ * @brief Turning on this key bypasses the device max allocation size check and switches
+ * addressing mode to allow allocations larger than 4GB as described here:
  * https://github.com/intel/compute-runtime/blob/master/programmers-guide/ALLOCATIONS_GREATER_THAN_4GB.md#creating-allocations-greater-than-4GB
+ * This is also useful when a single buffer exceeds the device's CL_DEVICE_MAX_MEM_ALLOC_SIZE.
  * Note: Performance may be lower with this option enabled.
  * @ingroup ov_runtime_ocl_gpu_prop_cpp_api
  */

@@ -20,7 +20,7 @@ OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_throttle, ov::intel_gpu::hin
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, queue_priority, ov::hint::Priority::MEDIUM, "Low-level hint that controls queue priority property")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_sdpa_optimization, true, "Enable/Disable fused SDPA primitive execution")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_lora_operation, true, "Enable/Disable LoRA operation. The separate operation is less versatile, but has better performance")
-OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_large_allocations, false, "Enable/Disable large buffer allocations (>4gb). Enabling this option may lead to performance degradation")
+OV_CONFIG_RELEASE_OPTION(ov::intel_gpu::hint, enable_large_allocations, false, "Allow buffer allocations that exceed the device max allocation size. Enabling this option may lead to performance degradation")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, enable_loop_unrolling, true, "Enable/Disable Loop/TensorIterator operation unrolling")
 OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, disable_winograd_convolution, false, "Enable/Disable winograd convolution implementation if available")
 OV_CONFIG_RELEASE_OPTION(ov::internal, exclusive_async_requests, false, "")

@@ -309,8 +309,9 @@ bool engine::check_allocatable(const layout& layout, allocation_type type) {
         OPENVINO_ASSERT(!exceed_allocatable_mem_size,
                         "[GPU] Exceeded max size of memory object allocation: ",
                         "requested ", layout.bytes_count(), " bytes, "
-                        "but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes.",
-                        "Please try to reduce batch size or use lower precision.");
+                        "but max alloc size supported by device is ", get_device_info().max_alloc_mem_size, " bytes. ",
+                        "Please try to reduce batch size, use lower precision, "
+                        "or set GPU_ENABLE_LARGE_ALLOCATIONS property to true.");
     }
 
     auto used_mem = get_used_device_memory(allocation_type::usm_device) + get_used_device_memory(allocation_type::usm_host);