apache
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎qdp/qdp-core/src/pipeline_runner.rs‎
Lines changed: 318 additions & 12 deletions b/‎qdp/qdp-core/src/pipeline_runner.rs‎
Lines changed: 318 additions & 12 deletions
@@ -24,7 +24,7 @@ setup-test-python:
 
 test_rust:
 ifeq ($(HAS_NVIDIA),yes)
-	cd qdp && cargo test
+	cd qdp && cargo test --workspace --exclude qdp-python
 else
 	@echo "[SKIP] No NVIDIA GPU detected, skipping test_rust"
 endif
 
@@ -18,11 +18,16 @@
 // Python bindings release GIL during the run.
 
 use std::f64::consts::PI;
+use std::path::Path;
+use std::sync::Mutex;
 use std::time::Instant;
 
 use crate::QdpEngine;
 use crate::dlpack::DLManagedTensor;
-use crate::error::Result;
+use crate::error::{MahoutError, Result};
+use crate::io;
+use crate::reader::StreamingDataReader;
+use crate::readers::ParquetStreamingReader;
 
 /// Configuration for throughput/latency pipeline runs (Python run_throughput_pipeline_py).
 #[derive(Clone, Debug)]
@@ -58,14 +63,111 @@ pub struct PipelineRunResult {
     pub latency_ms_per_vector: f64,
 }
 
-/// Data source for the pipeline iterator (Phase 1: Synthetic only; Phase 2: File).
-#[derive(Debug)]
+/// Data source for the pipeline iterator (Phase 1: Synthetic; Phase 2a: InMemory; Phase 2b: Streaming).
 pub enum DataSource {
     Synthetic {
         seed: u64,
         batch_index: usize,
         total_batches: usize,
     },
+    /// Phase 2a: full file loaded once; iterator slices by batch_size.
+    InMemory {
+        data: Vec<f64>,
+        cursor: usize,
+        num_samples: usize,
+        sample_size: usize,
+        batches_yielded: usize,
+        batch_limit: usize,
+    },
+    /// Phase 2b: stream from Parquet in chunks; iterator refills buffer and encodes by batch.
+    /// Reader is in Mutex so PipelineIterator remains Sync (required by PyO3 pyclass).
+    Streaming {
+        reader: Mutex<ParquetStreamingReader>,
+        buffer: Vec<f64>,
+        buffer_cursor: usize,
+        read_chunk_scratch: Vec<f64>,
+        sample_size: usize,
+        batch_limit: usize,
+        batches_yielded: usize,
+    },
+}
+
+impl std::fmt::Debug for DataSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            DataSource::Synthetic {
+                seed,
+                batch_index,
+                total_batches,
+            } => f
+                .debug_struct("Synthetic")
+                .field("seed", seed)
+                .field("batch_index", batch_index)
+                .field("total_batches", total_batches)
+                .finish(),
+            DataSource::InMemory {
+                cursor,
+                num_samples,
+                sample_size,
+                batches_yielded,
+                batch_limit,
+                ..
+            } => f
+                .debug_struct("InMemory")
+                .field("cursor", cursor)
+                .field("num_samples", num_samples)
+                .field("sample_size", sample_size)
+                .field("batches_yielded", batches_yielded)
+                .field("batch_limit", batch_limit)
+                .finish(),
+            DataSource::Streaming {
+                buffer,
+                buffer_cursor,
+                sample_size,
+                batch_limit,
+                batches_yielded,
+                ..
+            } => f
+                .debug_struct("Streaming")
+                .field("buffer_len", &buffer.len())
+                .field("buffer_cursor", buffer_cursor)
+                .field("sample_size", sample_size)
+                .field("batch_limit", batch_limit)
+                .field("batches_yielded", batches_yielded)
+                .finish(),
+        }
+    }
+}
+
+/// Default Parquet row group size for streaming reader (tunable).
+const DEFAULT_PARQUET_ROW_GROUP_SIZE: usize = 2048;
+
+/// When buffer_cursor >= buffer.len() / BUFFER_COMPACT_DENOM, compact by draining consumed prefix.
+const BUFFER_COMPACT_DENOM: usize = 2;
+
+/// Returns the path extension as lowercase ASCII (e.g. "parquet"), or None if missing/non-UTF8.
+fn path_extension_lower(path: &Path) -> Option<String> {
+    path.extension()
+        .and_then(|e| e.to_str())
+        .map(|s| s.to_lowercase())
+}
+
+/// Dispatches by path extension to the appropriate io reader. Returns (data, num_samples, sample_size).
+/// Unsupported or missing extension returns Err with message listing supported formats.
+fn read_file_by_extension(path: &Path) -> Result<(Vec<f64>, usize, usize)> {
+    let ext_lower = path_extension_lower(path);
+    let ext = ext_lower.as_deref();
+    match ext {
+        Some("parquet") => io::read_parquet_batch(path),
+        Some("arrow") | Some("feather") | Some("ipc") => io::read_arrow_ipc_batch(path),
+        Some("npy") => io::read_numpy_batch(path),
+        Some("pt") | Some("pth") => io::read_torch_batch(path),
+        Some("pb") => io::read_tensorflow_batch(path),
+        _ => Err(MahoutError::InvalidInput(format!(
+            "Unsupported file extension {:?}. Supported: .parquet, .arrow, .feather, .ipc, .npy, .pt, .pth, .pb",
+            path.extension()
+        ))),
+    }
 }
 
 /// Stateful iterator that yields one batch DLPack at a time for Python `for` loop consumption.
@@ -77,6 +179,9 @@ pub struct PipelineIterator {
     vector_len: usize,
 }
 
+/// (batch_data, batch_n, sample_size, num_qubits) from one source pull.
+type BatchFromSource = (Vec<f64>, usize, usize, usize);
+
 impl PipelineIterator {
     /// Create a new synthetic-data pipeline iterator.
     pub fn new_synthetic(engine: QdpEngine, config: PipelineConfig) -> Result<Self> {
@@ -94,26 +199,227 @@ impl PipelineIterator {
         })
     }
 
-    /// Returns the next batch as a DLPack pointer; `Ok(None)` when exhausted.
-    pub fn next_batch(&mut self) -> Result<Option<*mut DLManagedTensor>> {
-        let (batch_data, num_qubits) = match &mut self.source {
+    /// Create a pipeline iterator from a file (Phase 2a: load full file then slice by batch).
+    /// Dispatches by path extension; validates dimensions at construction.
+    ///
+    /// Supported extensions: .parquet, .arrow, .feather, .ipc, .npy, .pt, .pth, .pb.
+    /// For file source, `batch_limit` caps batches yielded (e.g. for testing); use `usize::MAX` to iterate until EOF.
+    pub fn new_from_file<P: AsRef<Path>>(
+        engine: QdpEngine,
+        path: P,
+        config: PipelineConfig,
+        batch_limit: usize,
+    ) -> Result<Self> {
+        let path = path.as_ref();
+        let (data, num_samples, sample_size) = read_file_by_extension(path)?;
+        let vector_len = vector_len(config.num_qubits, &config.encoding_method);
+
+        // Dimension validation at construction.
+        if sample_size != vector_len {
+            return Err(MahoutError::InvalidInput(format!(
+                "File feature length {} does not match vector_len {} for num_qubits={}, encoding={}",
+                sample_size, vector_len, config.num_qubits, config.encoding_method
+            )));
+        }
+        if data.len() != num_samples * sample_size {
+            return Err(MahoutError::InvalidInput(format!(
+                "File data length {} is not num_samples ({}) * sample_size ({})",
+                data.len(),
+                num_samples,
+                sample_size
+            )));
+        }
+
+        let source = DataSource::InMemory {
+            data,
+            cursor: 0,
+            num_samples,
+            sample_size,
+            batches_yielded: 0,
+            batch_limit,
+        };
+        Ok(Self {
+            engine,
+            config,
+            source,
+            vector_len,
+        })
+    }
+
+    /// Create a pipeline iterator from a Parquet file using streaming read (Phase 2b).
+    /// Only `.parquet` is supported; reduces memory for large files by reading in chunks.
+    /// Validates sample_size == vector_len after the first chunk.
+    pub fn new_from_file_streaming<P: AsRef<Path>>(
+        engine: QdpEngine,
+        path: P,
+        config: PipelineConfig,
+        batch_limit: usize,
+    ) -> Result<Self> {
+        let path = path.as_ref();
+        if path_extension_lower(path).as_deref() != Some("parquet") {
+            return Err(MahoutError::InvalidInput(format!(
+                "Streaming file loader supports only .parquet; got extension {:?}. Use .source_file(path) for other formats.",
+                path.extension()
+            )));
+        }
+
+        let mut reader = ParquetStreamingReader::new(path, Some(DEFAULT_PARQUET_ROW_GROUP_SIZE))?;
+        let vector_len = vector_len(config.num_qubits, &config.encoding_method);
+
+        // Read first chunk to learn sample_size; reuse as initial buffer.
+        const INITIAL_CHUNK_CAP: usize = 64 * 1024;
+        let mut buffer = vec![0.0; INITIAL_CHUNK_CAP];
+        let written = reader.read_chunk(&mut buffer)?;
+        if written == 0 {
+            return Err(MahoutError::InvalidInput(
+                "Parquet file is empty or contains no data.".to_string(),
+            ));
+        }
+        let sample_size = reader.get_sample_size().ok_or_else(|| {
+            MahoutError::InvalidInput(
+                "Parquet streaming reader did not set sample_size after first chunk.".to_string(),
+            )
+        })?;
+
+        if sample_size != vector_len {
+            return Err(MahoutError::InvalidInput(format!(
+                "File feature length {} does not match vector_len {} for num_qubits={}, encoding={}",
+                sample_size, vector_len, config.num_qubits, config.encoding_method
+            )));
+        }
+
+        buffer.truncate(written);
+        let read_chunk_scratch = vec![0.0; INITIAL_CHUNK_CAP];
+
+        let source = DataSource::Streaming {
+            reader: Mutex::new(reader),
+            buffer,
+            buffer_cursor: 0,
+            read_chunk_scratch,
+            sample_size,
+            batch_limit,
+            batches_yielded: 0,
+        };
+        Ok(Self {
+            engine,
+            config,
+            source,
+            vector_len,
+        })
+    }
+
+    /// Yields the next batch data from the current source; `None` when exhausted.
+    /// Returns (batch_data, batch_n, sample_size, num_qubits).
+    fn take_batch_from_source(&mut self) -> Result<Option<BatchFromSource>> {
+        Ok(match &mut self.source {
             DataSource::Synthetic {
                 batch_index,
                 total_batches,
                 ..
             } => {
                 if *batch_index >= *total_batches {
-                    return Ok(None);
+                    None
+                } else {
+                    let data = generate_batch(&self.config, *batch_index, self.vector_len);
+                    *batch_index += 1;
+                    Some((
+                        data,
+                        self.config.batch_size,
+                        self.vector_len,
+                        self.config.num_qubits as usize,
+                    ))
+                }
+            }
+            DataSource::InMemory {
+                data,
+                cursor,
+                sample_size,
+                batches_yielded,
+                batch_limit,
+                ..
+            } => {
+                if *batches_yielded >= *batch_limit {
+                    None
+                } else {
+                    let remaining = (data.len() - *cursor) / *sample_size;
+                    if remaining == 0 {
+                        None
+                    } else {
+                        let batch_n = remaining.min(self.config.batch_size);
+                        let start = *cursor;
+                        let end = start + batch_n * *sample_size;
+                        *cursor = end;
+                        *batches_yielded += 1;
+                        let slice = data[start..end].to_vec();
+                        Some((
+                            slice,
+                            batch_n,
+                            *sample_size,
+                            self.config.num_qubits as usize,
+                        ))
+                    }
                 }
-                let data = generate_batch(&self.config, *batch_index, self.vector_len);
-                *batch_index += 1;
-                (data, self.config.num_qubits as usize)
             }
+            DataSource::Streaming {
+                reader,
+                buffer,
+                buffer_cursor,
+                read_chunk_scratch,
+                sample_size,
+                batch_limit,
+                batches_yielded,
+            } => {
+                if *batches_yielded >= *batch_limit {
+                    None
+                } else {
+                    let required = self.config.batch_size * *sample_size;
+                    while (buffer.len() - *buffer_cursor) < required {
+                        let r = reader.get_mut().map_err(|e| {
+                            MahoutError::Io(format!("Streaming reader mutex poisoned: {}", e))
+                        })?;
+                        let written = r.read_chunk(read_chunk_scratch)?;
+                        if written == 0 {
+                            break;
+                        }
+                        buffer.extend_from_slice(&read_chunk_scratch[..written]);
+                    }
+                    let available = buffer.len() - *buffer_cursor;
+                    let available_samples = available / *sample_size;
+                    if available_samples == 0 {
+                        None
+                    } else {
+                        let batch_n = available_samples.min(self.config.batch_size);
+                        let start = *buffer_cursor;
+                        let end = start + batch_n * *sample_size;
+                        *buffer_cursor = end;
+                        *batches_yielded += 1;
+                        let slice = buffer[start..end].to_vec();
+                        if *buffer_cursor >= buffer.len() / BUFFER_COMPACT_DENOM {
+                            buffer.drain(..*buffer_cursor);
+                            *buffer_cursor = 0;
+                        }
+                        Some((
+                            slice,
+                            batch_n,
+                            *sample_size,
+                            self.config.num_qubits as usize,
+                        ))
+                    }
+                }
+            }
+        })
+    }
+
+    /// Returns the next batch as a DLPack pointer; `Ok(None)` when exhausted.
+    pub fn next_batch(&mut self) -> Result<Option<*mut DLManagedTensor>> {
+        let Some((batch_data, batch_n, sample_size, num_qubits)) = self.take_batch_from_source()?
+        else {
+            return Ok(None);
         };
         let ptr = self.engine.encode_batch(
             &batch_data,
-            self.config.batch_size,
-            self.vector_len,
+            batch_n,
+            sample_size,
             num_qubits,
             &self.config.encoding_method,
         )?;