Skip to content

Commit 640c160

Browse files
authored
[QDP] Support float32 CUDA amplitude encoding in Python bindings (#1025)
* feat: add direct encoding method for float32 tensors * fix: fix rebase error * feat: removes redundant checks & adds notes
1 parent 341b1d6 commit 640c160

File tree

3 files changed

+193
-88
lines changed

3 files changed

+193
-88
lines changed

qdp/qdp-python/src/lib.rs

Lines changed: 144 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,16 @@ fn validate_cuda_tensor_for_encoding(
286286
let dtype_str: String = dtype.str()?.extract()?;
287287
let dtype_str_lower = dtype_str.to_ascii_lowercase();
288288
match method.as_str() {
289-
"amplitude" | "angle" => {
289+
"amplitude" => {
290+
if !(dtype_str_lower.contains("float64") || dtype_str_lower.contains("float32")) {
291+
return Err(PyRuntimeError::new_err(format!(
292+
"CUDA tensor must have dtype float64 or float32 for amplitude encoding, got {}. \
293+
Use tensor.to(torch.float64) or tensor.to(torch.float32)",
294+
dtype_str
295+
)));
296+
}
297+
}
298+
"angle" => {
290299
if !dtype_str_lower.contains("float64") {
291300
return Err(PyRuntimeError::new_err(format!(
292301
"CUDA tensor must have dtype float64 for {} encoding, got {}. \
@@ -642,76 +651,7 @@ impl QdpEngine {
642651
if is_pytorch_tensor(data)? {
643652
// Check if it's a CUDA tensor - use zero-copy GPU encoding
644653
if is_cuda_tensor(data)? {
645-
// Validate CUDA tensor for direct GPU encoding
646-
validate_cuda_tensor_for_encoding(
647-
data,
648-
self.engine.device().ordinal(),
649-
encoding_method,
650-
)?;
651-
652-
// Extract GPU pointer directly from PyTorch tensor
653-
let tensor_info = extract_cuda_tensor_info(data)?;
654-
let stream_ptr = get_torch_cuda_stream_ptr(data)?;
655-
656-
let ndim: usize = data.call_method0("dim")?.extract()?;
657-
658-
match ndim {
659-
1 => {
660-
// 1D CUDA tensor: single sample encoding
661-
let input_len = tensor_info.shape[0] as usize;
662-
// SAFETY: tensor_info.data_ptr was obtained via PyTorch's data_ptr() from a
663-
// valid CUDA tensor. The tensor remains alive during this call
664-
// (held by Python's GIL), and we validated dtype/contiguity/device above.
665-
let ptr = unsafe {
666-
self.engine
667-
.encode_from_gpu_ptr_with_stream(
668-
tensor_info.data_ptr as *const std::ffi::c_void,
669-
input_len,
670-
num_qubits,
671-
encoding_method,
672-
stream_ptr,
673-
)
674-
.map_err(|e| {
675-
PyRuntimeError::new_err(format!("Encoding failed: {}", e))
676-
})?
677-
};
678-
return Ok(QuantumTensor {
679-
ptr,
680-
consumed: false,
681-
});
682-
}
683-
2 => {
684-
// 2D CUDA tensor: batch encoding
685-
let num_samples = tensor_info.shape[0] as usize;
686-
let sample_size = tensor_info.shape[1] as usize;
687-
// SAFETY: Same as above - pointer from validated PyTorch CUDA tensor
688-
let ptr = unsafe {
689-
self.engine
690-
.encode_batch_from_gpu_ptr_with_stream(
691-
tensor_info.data_ptr as *const std::ffi::c_void,
692-
num_samples,
693-
sample_size,
694-
num_qubits,
695-
encoding_method,
696-
stream_ptr,
697-
)
698-
.map_err(|e| {
699-
PyRuntimeError::new_err(format!("Encoding failed: {}", e))
700-
})?
701-
};
702-
return Ok(QuantumTensor {
703-
ptr,
704-
consumed: false,
705-
});
706-
}
707-
_ => {
708-
return Err(PyRuntimeError::new_err(format!(
709-
"Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
710-
sample encoding or 2D tensor (batch_size, features) for batch encoding.",
711-
ndim
712-
)));
713-
}
714-
}
654+
return self._encode_from_cuda_tensor(data, num_qubits, encoding_method);
715655
}
716656
// CPU PyTorch tensor path
717657
return self.encode_from_pytorch(data, num_qubits, encoding_method);
@@ -1149,6 +1089,139 @@ impl QdpEngine {
11491089
})?;
11501090
Ok(PyQuantumLoader::new(Some(iter)))
11511091
}
1092+
1093+
/// Encode directly from a PyTorch CUDA tensor. Internal helper.
1094+
///
1095+
/// Dispatches to the core f32 GPU pointer API for 1D float32 amplitude encoding,
1096+
/// or to the float64/basis GPU pointer APIs for other dtypes and batch encoding.
1097+
///
1098+
/// Args:
1099+
/// data: PyTorch CUDA tensor
1100+
/// num_qubits: Number of qubits
1101+
/// encoding_method: Encoding strategy (currently only "amplitude")
1102+
fn _encode_from_cuda_tensor(
1103+
&self,
1104+
data: &Bound<'_, PyAny>,
1105+
num_qubits: usize,
1106+
encoding_method: &str,
1107+
) -> PyResult<QuantumTensor> {
1108+
// Validate CUDA tensor for direct GPU encoding (shape, contiguity, device, dtype)
1109+
validate_cuda_tensor_for_encoding(data, self.engine.device().ordinal(), encoding_method)?;
1110+
1111+
// Determine dtype for dispatch (float32 vs float64, etc.).
1112+
let dtype = data.getattr("dtype")?;
1113+
let dtype_str: String = dtype.str()?.extract()?;
1114+
let dtype_str_lower = dtype_str.to_ascii_lowercase();
1115+
let is_f32 = dtype_str_lower.contains("float32");
1116+
let method = encoding_method.to_ascii_lowercase();
1117+
1118+
// Current f32 CUDA path only supports amplitude encoding for 1D tensors.
1119+
let ndim: usize = data.call_method0("dim")?.extract()?;
1120+
1121+
if method.as_str() == "amplitude" && is_f32 {
1122+
// NOTE: This f32 fast path intentionally bypasses `extract_cuda_tensor_info`/DLPack
1123+
// and uses PyTorch's `data_ptr()`/`numel()` directly, after
1124+
// `validate_cuda_tensor_for_encoding` has already enforced dtype/shape/contiguity/device.
1125+
// If additional validation is added to `extract_cuda_tensor_info` in the future, it must
1126+
// be mirrored here to keep behavior consistent.
1127+
match ndim {
1128+
1 => {
1129+
// 1D CUDA tensor, float32 amplitude encoding using core f32 GPU pointer API.
1130+
let input_len: usize = data.call_method0("numel")?.extract()?;
1131+
let stream_ptr = get_torch_cuda_stream_ptr(data)?;
1132+
let data_ptr_u64: u64 = data.call_method0("data_ptr")?.extract()?;
1133+
let data_ptr = data_ptr_u64 as *const f32;
1134+
1135+
let ptr = unsafe {
1136+
self.engine
1137+
.encode_from_gpu_ptr_f32_with_stream(
1138+
data_ptr, input_len, num_qubits, stream_ptr,
1139+
)
1140+
.map_err(|e| {
1141+
PyRuntimeError::new_err(format!(
1142+
"Encoding failed (float32 amplitude): {}",
1143+
e
1144+
))
1145+
})?
1146+
};
1147+
1148+
Ok(QuantumTensor {
1149+
ptr,
1150+
consumed: false,
1151+
})
1152+
}
1153+
2 => Err(PyRuntimeError::new_err(
1154+
"CUDA float32 batch amplitude encoding is not yet supported. \
1155+
Use float64 (tensor.to(torch.float64)) or encode samples individually.",
1156+
)),
1157+
_ => Err(PyRuntimeError::new_err(format!(
1158+
"Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
1159+
sample encoding or 2D tensor (batch_size, features) for batch encoding.",
1160+
ndim
1161+
))),
1162+
}
1163+
} else {
1164+
// Existing float64 (and basis/int64) CUDA path using direct GPU pointer.
1165+
let tensor_info = extract_cuda_tensor_info(data)?;
1166+
let stream_ptr = get_torch_cuda_stream_ptr(data)?;
1167+
1168+
match ndim {
1169+
1 => {
1170+
// 1D CUDA tensor: single sample encoding
1171+
let input_len = tensor_info.shape[0] as usize;
1172+
// SAFETY: tensor_info.data_ptr was obtained via PyTorch's data_ptr() from a
1173+
// valid CUDA tensor. The tensor remains alive during this call
1174+
// (held by Python's GIL), and we validated dtype/contiguity/device above.
1175+
let ptr = unsafe {
1176+
self.engine
1177+
.encode_from_gpu_ptr_with_stream(
1178+
tensor_info.data_ptr as *const std::ffi::c_void,
1179+
input_len,
1180+
num_qubits,
1181+
encoding_method,
1182+
stream_ptr,
1183+
)
1184+
.map_err(|e| {
1185+
PyRuntimeError::new_err(format!("Encoding failed: {}", e))
1186+
})?
1187+
};
1188+
Ok(QuantumTensor {
1189+
ptr,
1190+
consumed: false,
1191+
})
1192+
}
1193+
2 => {
1194+
// 2D CUDA tensor: batch encoding
1195+
let num_samples = tensor_info.shape[0] as usize;
1196+
let sample_size = tensor_info.shape[1] as usize;
1197+
// SAFETY: Same as above - pointer from validated PyTorch CUDA tensor
1198+
let ptr = unsafe {
1199+
self.engine
1200+
.encode_batch_from_gpu_ptr_with_stream(
1201+
tensor_info.data_ptr as *const std::ffi::c_void,
1202+
num_samples,
1203+
sample_size,
1204+
num_qubits,
1205+
encoding_method,
1206+
stream_ptr,
1207+
)
1208+
.map_err(|e| {
1209+
PyRuntimeError::new_err(format!("Encoding failed: {}", e))
1210+
})?
1211+
};
1212+
Ok(QuantumTensor {
1213+
ptr,
1214+
consumed: false,
1215+
})
1216+
}
1217+
_ => Err(PyRuntimeError::new_err(format!(
1218+
"Unsupported CUDA tensor shape: {}D. Expected 1D tensor for single \
1219+
sample encoding or 2D tensor (batch_size, features) for batch encoding.",
1220+
ndim
1221+
))),
1222+
}
1223+
}
1224+
}
11521225
}
11531226

11541227
// --- Loader bindings (Linux only; qdp-core pipeline types only built on Linux) ---

qdp/qdp-python/tests/test_dlpack_validation.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,30 @@ def _engine():
3232

3333

3434
@pytest.mark.skipif(not _cuda_available(), reason="CUDA not available")
35-
def test_dtype_validation_float32_rejected():
36-
"""DLPack tensor must be float64; float32 CUDA tensor should fail with clear message."""
35+
def test_cuda_float32_amplitude_supported():
36+
"""1D float32 CUDA tensor should be supported for amplitude encoding via GPU pointer f32 path."""
3737
engine = _engine()
3838
# 1D float32 CUDA tensor (contiguous)
3939
t = torch.randn(4, dtype=torch.float32, device="cuda")
40-
with pytest.raises(RuntimeError) as exc_info:
40+
result = engine.encode(t, num_qubits=2, encoding_method="amplitude")
41+
assert result is not None
42+
43+
# Verify DLPack round-trip works and tensor is on CUDA
44+
qt = torch.from_dlpack(result)
45+
assert qt.is_cuda
46+
# With default engine precision=float32, complex64 is expected
47+
assert qt.dtype in (torch.complex64, torch.complex128)
48+
49+
50+
@pytest.mark.skipif(not _cuda_available(), reason="CUDA not available")
51+
def test_cuda_float32_amplitude_2d_unsupported():
52+
"""2D float32 CUDA tensor with amplitude encoding should raise a clear error."""
53+
engine = _engine()
54+
t = torch.randn(2, 4, dtype=torch.float32, device="cuda")
55+
with pytest.raises(
56+
RuntimeError, match="float32 batch amplitude encoding is not yet supported"
57+
):
4158
engine.encode(t, num_qubits=2, encoding_method="amplitude")
42-
msg = str(exc_info.value).lower()
43-
assert "float64" in msg
44-
# Accept either DLPack-style (code=/bits=/lanes=) or user-facing (float32/dtype) message
45-
assert (
46-
"code=" in msg
47-
or "bits=" in msg
48-
or "lanes=" in msg
49-
or "float32" in msg
50-
or "dtype" in msg
51-
)
5259

5360

5461
@pytest.mark.skipif(not _cuda_available(), reason="CUDA not available")

testing/qdp/test_bindings.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def test_encode_cuda_tensor(data_shape, expected_shape, expected_batch_size):
315315
@requires_qdp
316316
@pytest.mark.gpu
317317
def test_encode_cuda_tensor_wrong_dtype():
318-
"""Test error when CUDA tensor has wrong dtype (non-float64)."""
318+
"""Test error when CUDA tensor has wrong dtype for amplitude (e.g. float16)."""
319319
pytest.importorskip("torch")
320320
from _qdp import QdpEngine
321321

@@ -324,9 +324,9 @@ def test_encode_cuda_tensor_wrong_dtype():
324324

325325
engine = QdpEngine(0)
326326

327-
# Create CUDA tensor with float32 dtype (wrong)
328-
data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32, device="cuda:0")
329-
with pytest.raises(RuntimeError, match="CUDA tensor must have dtype float64"):
327+
# Amplitude encoding accepts float64 or float32 only; float16 is invalid
328+
data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float16, device="cuda:0")
329+
with pytest.raises(RuntimeError, match="float64 or float32"):
330330
engine.encode(data, 2, "amplitude")
331331

332332

@@ -538,6 +538,31 @@ def test_encode_cuda_tensor_output_dtype(precision, expected_dtype):
538538
)
539539

540540

541+
@requires_qdp
542+
@pytest.mark.gpu
543+
@pytest.mark.parametrize(
544+
"precision,expected_dtype",
545+
[
546+
("float32", torch.complex64),
547+
("float64", torch.complex128),
548+
],
549+
)
550+
def test_encode_cuda_tensor_float32_input_output_dtype(precision, expected_dtype):
551+
"""Test that 1D float32 CUDA amplitude encoding respects engine precision (f32 path)."""
552+
pytest.importorskip("torch")
553+
from _qdp import QdpEngine
554+
555+
if not torch.cuda.is_available():
556+
pytest.skip("GPU required for QdpEngine")
557+
558+
engine = QdpEngine(0, precision=precision)
559+
data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32, device="cuda:0")
560+
result = torch.from_dlpack(engine.encode(data, 2, "amplitude"))
561+
assert result.dtype == expected_dtype, (
562+
f"Expected {expected_dtype}, got {result.dtype}"
563+
)
564+
565+
541566
@requires_qdp
542567
@pytest.mark.gpu
543568
def test_basis_encode_basic():

0 commit comments

Comments
 (0)