@@ -286,7 +286,16 @@ fn validate_cuda_tensor_for_encoding(
286286 let dtype_str: String = dtype. str ( ) ?. extract ( ) ?;
287287 let dtype_str_lower = dtype_str. to_ascii_lowercase ( ) ;
288288 match method. as_str ( ) {
289- "amplitude" | "angle" => {
289+ "amplitude" => {
290+ if !( dtype_str_lower. contains ( "float64" ) || dtype_str_lower. contains ( "float32" ) ) {
291+ return Err ( PyRuntimeError :: new_err ( format ! (
292+ "CUDA tensor must have dtype float64 or float32 for amplitude encoding, got {}. \
293+ Use tensor.to(torch.float64) or tensor.to(torch.float32)",
294+ dtype_str
295+ ) ) ) ;
296+ }
297+ }
298+ "angle" => {
290299 if !dtype_str_lower. contains ( "float64" ) {
291300 return Err ( PyRuntimeError :: new_err ( format ! (
292301 "CUDA tensor must have dtype float64 for {} encoding, got {}. \
@@ -642,76 +651,7 @@ impl QdpEngine {
642651 if is_pytorch_tensor ( data) ? {
643652 // Check if it's a CUDA tensor - use zero-copy GPU encoding
644653 if is_cuda_tensor ( data) ? {
645- // Validate CUDA tensor for direct GPU encoding
646- validate_cuda_tensor_for_encoding (
647- data,
648- self . engine . device ( ) . ordinal ( ) ,
649- encoding_method,
650- ) ?;
651-
652- // Extract GPU pointer directly from PyTorch tensor
653- let tensor_info = extract_cuda_tensor_info ( data) ?;
654- let stream_ptr = get_torch_cuda_stream_ptr ( data) ?;
655-
656- let ndim: usize = data. call_method0 ( "dim" ) ?. extract ( ) ?;
657-
658- match ndim {
659- 1 => {
660- // 1D CUDA tensor: single sample encoding
661- let input_len = tensor_info. shape [ 0 ] as usize ;
662- // SAFETY: tensor_info.data_ptr was obtained via PyTorch's data_ptr() from a
663- // valid CUDA tensor. The tensor remains alive during this call
664- // (held by Python's GIL), and we validated dtype/contiguity/device above.
665- let ptr = unsafe {
666- self . engine
667- . encode_from_gpu_ptr_with_stream (
668- tensor_info. data_ptr as * const std:: ffi:: c_void ,
669- input_len,
670- num_qubits,
671- encoding_method,
672- stream_ptr,
673- )
674- . map_err ( |e| {
675- PyRuntimeError :: new_err ( format ! ( "Encoding failed: {}" , e) )
676- } ) ?
677- } ;
678- return Ok ( QuantumTensor {
679- ptr,
680- consumed : false ,
681- } ) ;
682- }
683- 2 => {
684- // 2D CUDA tensor: batch encoding
685- let num_samples = tensor_info. shape [ 0 ] as usize ;
686- let sample_size = tensor_info. shape [ 1 ] as usize ;
687- // SAFETY: Same as above - pointer from validated PyTorch CUDA tensor
688- let ptr = unsafe {
689- self . engine
690- . encode_batch_from_gpu_ptr_with_stream (
691- tensor_info. data_ptr as * const std:: ffi:: c_void ,
692- num_samples,
693- sample_size,
694- num_qubits,
695- encoding_method,
696- stream_ptr,
697- )
698- . map_err ( |e| {
699- PyRuntimeError :: new_err ( format ! ( "Encoding failed: {}" , e) )
700- } ) ?
701- } ;
702- return Ok ( QuantumTensor {
703- ptr,
704- consumed : false ,
705- } ) ;
706- }
707- _ => {
708- return Err ( PyRuntimeError :: new_err ( format ! (
709- "Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
710- sample encoding or 2D tensor (batch_size, features) for batch encoding.",
711- ndim
712- ) ) ) ;
713- }
714- }
654+ return self . _encode_from_cuda_tensor ( data, num_qubits, encoding_method) ;
715655 }
716656 // CPU PyTorch tensor path
717657 return self . encode_from_pytorch ( data, num_qubits, encoding_method) ;
@@ -1149,6 +1089,139 @@ impl QdpEngine {
11491089 } ) ?;
11501090 Ok ( PyQuantumLoader :: new ( Some ( iter) ) )
11511091 }
1092+
1093+ /// Encode directly from a PyTorch CUDA tensor. Internal helper.
1094+ ///
1095+ /// Dispatches to the core f32 GPU pointer API for 1D float32 amplitude encoding,
1096+ /// or to the float64/basis GPU pointer APIs for other dtypes and batch encoding.
1097+ ///
1098+ /// Args:
1099+ /// data: PyTorch CUDA tensor
1100+ /// num_qubits: Number of qubits
1101+ /// encoding_method: Encoding strategy (currently only "amplitude")
1102+ fn _encode_from_cuda_tensor (
1103+ & self ,
1104+ data : & Bound < ' _ , PyAny > ,
1105+ num_qubits : usize ,
1106+ encoding_method : & str ,
1107+ ) -> PyResult < QuantumTensor > {
1108+ // Validate CUDA tensor for direct GPU encoding (shape, contiguity, device, dtype)
1109+ validate_cuda_tensor_for_encoding ( data, self . engine . device ( ) . ordinal ( ) , encoding_method) ?;
1110+
1111+ // Determine dtype for dispatch (float32 vs float64, etc.).
1112+ let dtype = data. getattr ( "dtype" ) ?;
1113+ let dtype_str: String = dtype. str ( ) ?. extract ( ) ?;
1114+ let dtype_str_lower = dtype_str. to_ascii_lowercase ( ) ;
1115+ let is_f32 = dtype_str_lower. contains ( "float32" ) ;
1116+ let method = encoding_method. to_ascii_lowercase ( ) ;
1117+
1118+ // Current f32 CUDA path only supports amplitude encoding for 1D tensors.
1119+ let ndim: usize = data. call_method0 ( "dim" ) ?. extract ( ) ?;
1120+
1121+ if method. as_str ( ) == "amplitude" && is_f32 {
1122+ // NOTE: This f32 fast path intentionally bypasses `extract_cuda_tensor_info`/DLPack
1123+ // and uses PyTorch's `data_ptr()`/`numel()` directly, after
1124+ // `validate_cuda_tensor_for_encoding` has already enforced dtype/shape/contiguity/device.
1125+ // If additional validation is added to `extract_cuda_tensor_info` in the future, it must
1126+ // be mirrored here to keep behavior consistent.
1127+ match ndim {
1128+ 1 => {
1129+ // 1D CUDA tensor, float32 amplitude encoding using core f32 GPU pointer API.
1130+ let input_len: usize = data. call_method0 ( "numel" ) ?. extract ( ) ?;
1131+ let stream_ptr = get_torch_cuda_stream_ptr ( data) ?;
1132+ let data_ptr_u64: u64 = data. call_method0 ( "data_ptr" ) ?. extract ( ) ?;
1133+ let data_ptr = data_ptr_u64 as * const f32 ;
1134+
1135+ let ptr = unsafe {
1136+ self . engine
1137+ . encode_from_gpu_ptr_f32_with_stream (
1138+ data_ptr, input_len, num_qubits, stream_ptr,
1139+ )
1140+ . map_err ( |e| {
1141+ PyRuntimeError :: new_err ( format ! (
1142+ "Encoding failed (float32 amplitude): {}" ,
1143+ e
1144+ ) )
1145+ } ) ?
1146+ } ;
1147+
1148+ Ok ( QuantumTensor {
1149+ ptr,
1150+ consumed : false ,
1151+ } )
1152+ }
1153+ 2 => Err ( PyRuntimeError :: new_err (
1154+ "CUDA float32 batch amplitude encoding is not yet supported. \
1155+ Use float64 (tensor.to(torch.float64)) or encode samples individually.",
1156+ ) ) ,
1157+ _ => Err ( PyRuntimeError :: new_err ( format ! (
1158+ "Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
1159+ sample encoding or 2D tensor (batch_size, features) for batch encoding.",
1160+ ndim
1161+ ) ) ) ,
1162+ }
1163+ } else {
1164+ // Existing float64 (and basis/int64) CUDA path using direct GPU pointer.
1165+ let tensor_info = extract_cuda_tensor_info ( data) ?;
1166+ let stream_ptr = get_torch_cuda_stream_ptr ( data) ?;
1167+
1168+ match ndim {
1169+ 1 => {
1170+ // 1D CUDA tensor: single sample encoding
1171+ let input_len = tensor_info. shape [ 0 ] as usize ;
1172+ // SAFETY: tensor_info.data_ptr was obtained via PyTorch's data_ptr() from a
1173+ // valid CUDA tensor. The tensor remains alive during this call
1174+ // (held by Python's GIL), and we validated dtype/contiguity/device above.
1175+ let ptr = unsafe {
1176+ self . engine
1177+ . encode_from_gpu_ptr_with_stream (
1178+ tensor_info. data_ptr as * const std:: ffi:: c_void ,
1179+ input_len,
1180+ num_qubits,
1181+ encoding_method,
1182+ stream_ptr,
1183+ )
1184+ . map_err ( |e| {
1185+ PyRuntimeError :: new_err ( format ! ( "Encoding failed: {}" , e) )
1186+ } ) ?
1187+ } ;
1188+ Ok ( QuantumTensor {
1189+ ptr,
1190+ consumed : false ,
1191+ } )
1192+ }
1193+ 2 => {
1194+ // 2D CUDA tensor: batch encoding
1195+ let num_samples = tensor_info. shape [ 0 ] as usize ;
1196+ let sample_size = tensor_info. shape [ 1 ] as usize ;
1197+ // SAFETY: Same as above - pointer from validated PyTorch CUDA tensor
1198+ let ptr = unsafe {
1199+ self . engine
1200+ . encode_batch_from_gpu_ptr_with_stream (
1201+ tensor_info. data_ptr as * const std:: ffi:: c_void ,
1202+ num_samples,
1203+ sample_size,
1204+ num_qubits,
1205+ encoding_method,
1206+ stream_ptr,
1207+ )
1208+ . map_err ( |e| {
1209+ PyRuntimeError :: new_err ( format ! ( "Encoding failed: {}" , e) )
1210+ } ) ?
1211+ } ;
1212+ Ok ( QuantumTensor {
1213+ ptr,
1214+ consumed : false ,
1215+ } )
1216+ }
1217+ _ => Err ( PyRuntimeError :: new_err ( format ! (
1218+ "Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
1219+ sample encoding or 2D tensor (batch_size, features) for batch encoding.",
1220+ ndim
1221+ ) ) ) ,
1222+ }
1223+ }
1224+ }
11521225}
11531226
11541227// --- Loader bindings (Linux only; qdp-core pipeline types only built on Linux) ---
0 commit comments