Skip to content

Commit 03fac63

Browse files
committed
[QDP] feat: add credit card fraud benchmark + amplitude encoding optimizations
1 parent b68cdfd commit 03fac63

File tree

10 files changed

+1804
-40
lines changed

10 files changed

+1804
-40
lines changed

qdp/qdp-core/src/gpu/encodings/amplitude.rs

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -245,21 +245,9 @@ impl QuantumEncoder for AmplitudeEncoder {
245245
buffer
246246
};
247247

248-
// Validate norms on host to catch zero or NaN samples early
249-
{
250-
crate::profile_scope!("GPU::NormValidation");
251-
let host_inv_norms = device
252-
.dtoh_sync_copy(&inv_norms_gpu)
253-
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
254-
255-
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
256-
return Err(MahoutError::InvalidInput(
257-
"One or more samples have zero or invalid norm".to_string(),
258-
));
259-
}
260-
}
261-
262-
// Launch batch kernel
248+
// Launch batch encode kernel — takes GPU norm buffer directly, no D2H needed yet.
249+
// We defer the norm validation D2H copy until AFTER the encode kernel + sync so that
250+
// the norm kernel → encode kernel sequence runs without an intermediate GPU-CPU roundtrip.
263251
{
264252
crate::profile_scope!("GPU::BatchKernelLaunch");
265253
let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| {
@@ -288,14 +276,30 @@ impl QuantumEncoder for AmplitudeEncoder {
288276
}
289277
}
290278

291-
// Synchronize
279+
// Synchronize — all GPU work (norm + encode) complete after this point.
292280
{
293281
crate::profile_scope!("GPU::Synchronize");
294282
device
295283
.synchronize()
296284
.map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}", e)))?;
297285
}
298286

287+
// Validate norms on host AFTER sync: D2H copy no longer blocks the encode kernel.
288+
// This preserves error detection for zero/NaN samples without adding a mid-pipeline
289+
// GPU-CPU roundtrip between the norm and encode kernels.
290+
{
291+
crate::profile_scope!("GPU::NormValidation");
292+
let host_inv_norms = device
293+
.dtoh_sync_copy(&inv_norms_gpu)
294+
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
295+
296+
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
297+
return Err(MahoutError::InvalidInput(
298+
"One or more samples have zero or invalid norm".to_string(),
299+
));
300+
}
301+
}
302+
299303
Ok(batch_state_vector)
300304
}
301305

@@ -412,17 +416,8 @@ impl QuantumEncoder for AmplitudeEncoder {
412416
}
413417
buffer
414418
};
415-
{
416-
crate::profile_scope!("GPU::NormValidation");
417-
let host_inv_norms = device
418-
.dtoh_sync_copy(&inv_norms_gpu)
419-
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
420-
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
421-
return Err(MahoutError::InvalidInput(
422-
"One or more samples have zero or invalid norm".to_string(),
423-
));
424-
}
425-
}
419+
// Launch encode kernel before D2H norm validation: GPU norm buffer is passed directly,
420+
// so the encode kernel can run immediately after the norm kernel without a CPU roundtrip.
426421
{
427422
crate::profile_scope!("GPU::BatchKernelLaunch");
428423
use cudarc::driver::DevicePtr;
@@ -450,10 +445,22 @@ impl QuantumEncoder for AmplitudeEncoder {
450445
)));
451446
}
452447
}
448+
// Synchronize first; then validate norms on host (D2H after all GPU work is done).
453449
{
454450
crate::profile_scope!("GPU::Synchronize");
455451
sync_cuda_stream(stream, "CUDA stream synchronize failed")?;
456452
}
453+
{
454+
crate::profile_scope!("GPU::NormValidation");
455+
let host_inv_norms = device
456+
.dtoh_sync_copy(&inv_norms_gpu)
457+
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
458+
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
459+
return Err(MahoutError::InvalidInput(
460+
"One or more samples have zero or invalid norm".to_string(),
461+
));
462+
}
463+
}
457464
Ok(batch_state_vector)
458465
}
459466

qdp/qdp-core/src/pipeline_runner.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,47 @@ impl PipelineIterator {
259259
})
260260
}
261261

262+
/// Create a pipeline iterator from an in-memory array (e.g. from Python numpy).
263+
/// Data is owned by the iterator; the full encode loop runs in Rust (take_batch + encode_batch).
264+
pub fn new_from_array(
265+
engine: QdpEngine,
266+
data: Vec<f64>,
267+
num_samples: usize,
268+
sample_size: usize,
269+
config: PipelineConfig,
270+
batch_limit: usize,
271+
) -> Result<Self> {
272+
let vector_len = vector_len(config.num_qubits, &config.encoding_method);
273+
if sample_size != vector_len {
274+
return Err(MahoutError::InvalidInput(format!(
275+
"Array sample_size {} does not match vector_len {} for num_qubits={}, encoding={}",
276+
sample_size, vector_len, config.num_qubits, config.encoding_method
277+
)));
278+
}
279+
if data.len() != num_samples * sample_size {
280+
return Err(MahoutError::InvalidInput(format!(
281+
"Array length {} is not num_samples ({}) * sample_size ({})",
282+
data.len(),
283+
num_samples,
284+
sample_size
285+
)));
286+
}
287+
let source = DataSource::InMemory {
288+
data,
289+
cursor: 0,
290+
num_samples,
291+
sample_size,
292+
batches_yielded: 0,
293+
batch_limit,
294+
};
295+
Ok(Self {
296+
engine,
297+
config,
298+
source,
299+
vector_len,
300+
})
301+
}
302+
262303
/// Create a pipeline iterator from a Parquet file using streaming read (Phase 2b).
263304
/// Only `.parquet` is supported; reduces memory for large files by reading in chunks.
264305
/// Validates sample_size == vector_len after the first chunk.
@@ -428,7 +469,61 @@ impl PipelineIterator {
428469
}
429470

430471
/// Returns the next batch as a DLPack pointer; `Ok(None)` when exhausted.
472+
/// For InMemory source, passes a slice reference to encode_batch (no per-batch copy).
431473
pub fn next_batch(&mut self) -> Result<Option<*mut DLManagedTensor>> {
474+
// InMemory: update cursor, then encode from &data[start..end] to avoid to_vec().
475+
let in_memory_range: Option<(usize, usize, usize, usize)> = match &mut self.source {
476+
DataSource::InMemory {
477+
data,
478+
cursor,
479+
sample_size,
480+
batches_yielded,
481+
batch_limit,
482+
..
483+
} => {
484+
if *batches_yielded >= *batch_limit {
485+
None
486+
} else {
487+
let remaining = (data.len() - *cursor) / *sample_size;
488+
if remaining == 0 {
489+
None
490+
} else {
491+
let batch_n = remaining.min(self.config.batch_size);
492+
let start = *cursor;
493+
let end = start + batch_n * *sample_size;
494+
*cursor = end;
495+
*batches_yielded += 1;
496+
Some((
497+
start,
498+
batch_n,
499+
*sample_size,
500+
self.config.num_qubits as usize,
501+
))
502+
}
503+
}
504+
}
505+
_ => None,
506+
};
507+
508+
if let Some((start, batch_n, sample_size, num_qubits)) = in_memory_range {
509+
let slice = match &self.source {
510+
DataSource::InMemory { data, .. } => {
511+
let len = batch_n * sample_size;
512+
&data[start..start + len]
513+
}
514+
_ => unreachable!(),
515+
};
516+
let ptr = self.engine.encode_batch(
517+
slice,
518+
batch_n,
519+
sample_size,
520+
num_qubits,
521+
&self.config.encoding_method,
522+
)?;
523+
return Ok(Some(ptr));
524+
}
525+
526+
// Synthetic / Streaming: take_batch_from_source (may copy) then encode.
432527
let Some((batch_data, batch_n, sample_size, num_qubits)) = self.take_batch_from_source()?
433528
else {
434529
return Ok(None);

qdp/qdp-python/benchmark/encoding_benchmarks/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,25 @@ To see the full list of options and defaults, append `--help`:
7575
uv run python benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py --help
7676
uv run python benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py --help
7777
```
78+
79+
## Credit Card Fraud amplitude baseline (PennyLane)
80+
81+
Minimal, reproducible steps (run from `qdp/qdp-python`):
82+
83+
1. **Download dataset (once)** — Kaggle `creditcard.csv` mirror:
84+
85+
```bash
86+
mkdir -p benchmark/encoding_benchmarks/pennylane_baseline/data
87+
curl -L -o benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
88+
https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv
89+
```
90+
91+
2. **Run the PennyLane baseline** — StandardScaler → PCA(16) → L2 norm → 4‑qubit amplitude VQC:
92+
93+
```bash
94+
uv run python benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py \
95+
--data-file benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
96+
--max-samples 300000 --iters 200 --batch-size 512 --trials 1
97+
```
98+
99+
This prints compile time, train time / throughput, and task metrics (AUPRC, F1, precision, recall) on the test set.

0 commit comments

Comments
 (0)