@@ -26,7 +26,7 @@ use crate::QdpEngine;
2626use crate :: dlpack:: DLManagedTensor ;
2727use crate :: error:: { MahoutError , Result } ;
2828use crate :: io;
29- use crate :: reader:: StreamingDataReader ;
29+ use crate :: reader:: { NullHandling , StreamingDataReader } ;
3030use crate :: readers:: ParquetStreamingReader ;
3131
3232/// Configuration for throughput/latency pipeline runs (Python run_throughput_pipeline_py).
@@ -39,6 +39,7 @@ pub struct PipelineConfig {
3939 pub encoding_method : String ,
4040 pub seed : Option < u64 > ,
4141 pub warmup_batches : usize ,
42+ pub null_handling : NullHandling ,
4243}
4344
4445impl Default for PipelineConfig {
@@ -51,6 +52,7 @@ impl Default for PipelineConfig {
5152 encoding_method : "amplitude" . to_string ( ) ,
5253 seed : None ,
5354 warmup_batches : 0 ,
55+ null_handling : NullHandling :: FillZero ,
5456 }
5557 }
5658}
@@ -154,12 +156,23 @@ fn path_extension_lower(path: &Path) -> Option<String> {
154156
155157/// Dispatches by path extension to the appropriate io reader. Returns (data, num_samples, sample_size).
156158/// Unsupported or missing extension returns Err with message listing supported formats.
157- fn read_file_by_extension ( path : & Path ) -> Result < ( Vec < f64 > , usize , usize ) > {
159+ fn read_file_by_extension (
160+ path : & Path ,
161+ null_handling : NullHandling ,
162+ ) -> Result < ( Vec < f64 > , usize , usize ) > {
158163 let ext_lower = path_extension_lower ( path) ;
159164 let ext = ext_lower. as_deref ( ) ;
160165 match ext {
161- Some ( "parquet" ) => io:: read_parquet_batch ( path) ,
162- Some ( "arrow" ) | Some ( "feather" ) | Some ( "ipc" ) => io:: read_arrow_ipc_batch ( path) ,
166+ Some ( "parquet" ) => {
167+ use crate :: reader:: DataReader ;
168+ let mut reader = crate :: readers:: ParquetReader :: new ( path, None , null_handling) ?;
169+ reader. read_batch ( )
170+ }
171+ Some ( "arrow" ) | Some ( "feather" ) | Some ( "ipc" ) => {
172+ use crate :: reader:: DataReader ;
173+ let mut reader = crate :: readers:: ArrowIPCReader :: new ( path, null_handling) ?;
174+ reader. read_batch ( )
175+ }
163176 Some ( "npy" ) => io:: read_numpy_batch ( path) ,
164177 Some ( "pt" ) | Some ( "pth" ) => io:: read_torch_batch ( path) ,
165178 Some ( "pb" ) => io:: read_tensorflow_batch ( path) ,
@@ -211,7 +224,7 @@ impl PipelineIterator {
211224 batch_limit : usize ,
212225 ) -> Result < Self > {
213226 let path = path. as_ref ( ) ;
214- let ( data, num_samples, sample_size) = read_file_by_extension ( path) ?;
227+ let ( data, num_samples, sample_size) = read_file_by_extension ( path, config . null_handling ) ?;
215228 let vector_len = vector_len ( config. num_qubits , & config. encoding_method ) ;
216229
217230 // Dimension validation at construction.
@@ -263,7 +276,11 @@ impl PipelineIterator {
263276 ) ) ) ;
264277 }
265278
266- let mut reader = ParquetStreamingReader :: new ( path, Some ( DEFAULT_PARQUET_ROW_GROUP_SIZE ) ) ?;
279+ let mut reader = ParquetStreamingReader :: new (
280+ path,
281+ Some ( DEFAULT_PARQUET_ROW_GROUP_SIZE ) ,
282+ config. null_handling ,
283+ ) ?;
267284 let vector_len = vector_len ( config. num_qubits , & config. encoding_method ) ;
268285
269286 // Read first chunk to learn sample_size; reuse as initial buffer.
0 commit comments