(mahout) branch main updated: [QDP] Optimize run_throughput_pipeline to avoid per-iteration Vec allocations (#1136)

richhuang Tue, 10 Mar 2026 00:29:33 -0700

This is an automated email from the ASF dual-hosted git repository.

richhuang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/main by this push:
     new 6092a6c96 [QDP] Optimize run_throughput_pipeline to avoid 
per-iteration Vec allocations (#1136)
6092a6c96 is described below

commit 6092a6c96cb3071f473e1d49ac8301f13c6c420b
Author: ChenChen Lai <[email protected]>
AuthorDate: Tue Mar 10 15:28:55 2026 +0800

    [QDP] Optimize run_throughput_pipeline to avoid per-iteration Vec 
allocations (#1136)
    
    * Optimize run_throughput_pipeline to avoid per-iteration Vec allocations
    
    * fix: address clippy warning in test code
    
    * add edge case
    
    * add test
    
    ---------
    
    Co-authored-by: user <0lai0>
---
 qdp/qdp-core/src/pipeline_runner.rs | 242 ++++++++++++++++++++++++++++++++++--
 1 file changed, 233 insertions(+), 9 deletions(-)

diff --git a/qdp/qdp-core/src/pipeline_runner.rs 
b/qdp/qdp-core/src/pipeline_runner.rs
index 9a41ee4bc..df1f61a23 100644
--- a/qdp/qdp-core/src/pipeline_runner.rs
+++ b/qdp/qdp-core/src/pipeline_runner.rs
@@ -455,14 +455,16 @@ pub fn vector_len(num_qubits: u32, encoding_method: &str) 
-> usize {
 }
 
 /// Deterministic sample generation matching Python utils.build_sample 
(amplitude/angle/basis).
-fn fill_sample(seed: u64, out: &mut [f64], encoding_method: &str) -> 
Result<()> {
+fn fill_sample(seed: u64, out: &mut [f64], encoding_method: &str, num_qubits: 
usize) -> Result<()> {
     let len = out.len();
     if len == 0 {
         return Ok(());
     }
     match encoding_method.to_lowercase().as_str() {
         "basis" => {
-            let mask = len.saturating_sub(1) as u64;
+            // For basis encoding, use 2^num_qubits as the state space size 
for mask calculation
+            let state_space_size = 1 << num_qubits;
+            let mask = (state_space_size - 1) as u64;
             let idx = seed & mask;
             out[0] = idx as f64;
         }
@@ -488,20 +490,32 @@ fn fill_sample(seed: u64, out: &mut [f64], 
encoding_method: &str) -> Result<()>
 
 /// Generate one batch (batch_size * vector_len elements, or batch_size * 1 
for basis).
 fn generate_batch(config: &PipelineConfig, batch_idx: usize, vector_len: 
usize) -> Vec<f64> {
+    let mut batch = vec![0.0f64; config.batch_size * vector_len];
+    fill_batch_inplace(config, batch_idx, vector_len, &mut batch);
+    batch
+}
+
+/// Fill an existing batch buffer in-place (avoids per-iteration allocations 
in benchmarks).
+fn fill_batch_inplace(
+    config: &PipelineConfig,
+    batch_idx: usize,
+    vector_len: usize,
+    batch_buf: &mut [f64],
+) {
+    debug_assert_eq!(batch_buf.len(), config.batch_size * vector_len);
     let seed_base = config
         .seed
         .unwrap_or(0)
         .wrapping_add((batch_idx * config.batch_size) as u64);
-    let mut batch = vec![0.0f64; config.batch_size * vector_len];
     for i in 0..config.batch_size {
         let offset = i * vector_len;
         let _ = fill_sample(
             seed_base + i as u64,
-            &mut batch[offset..offset + vector_len],
+            &mut batch_buf[offset..offset + vector_len],
             &config.encoding_method,
+            config.num_qubits as usize,
         );
     }
-    batch
 }
 
 /// Release DLPack tensor (call deleter so GPU memory is freed).
@@ -521,11 +535,14 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) 
-> Result<PipelineRunRes
     let vector_len = vector_len(config.num_qubits, &config.encoding_method);
     let num_qubits = config.num_qubits as usize;
 
+    // Reuse a single CPU batch buffer to avoid per-iteration allocations in 
throughput benchmarks.
+    let mut batch_buf = vec![0.0f64; config.batch_size * vector_len];
+
     // Warmup
     for b in 0..config.warmup_batches {
-        let batch = generate_batch(config, b, vector_len);
+        fill_batch_inplace(config, b, vector_len, &mut batch_buf);
         let ptr = engine.encode_batch(
-            &batch,
+            &batch_buf,
             config.batch_size,
             vector_len,
             num_qubits,
@@ -539,9 +556,9 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) -> 
Result<PipelineRunRes
 
     let start = Instant::now();
     for b in 0..config.total_batches {
-        let batch = generate_batch(config, b, vector_len);
+        fill_batch_inplace(config, b, vector_len, &mut batch_buf);
         let ptr = engine.encode_batch(
-            &batch,
+            &batch_buf,
             config.batch_size,
             vector_len,
             num_qubits,
@@ -569,3 +586,210 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) 
-> Result<PipelineRunRes
 pub fn run_latency_pipeline(config: &PipelineConfig) -> 
Result<PipelineRunResult> {
     run_throughput_pipeline(config)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn assert_generate_and_inplace_match(encoding_method: &str) {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: encoding_method.to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+
+        // Test edge cases: 0 and batch_size-1
+        for batch_idx in [0, config.batch_size - 1, 7] {
+            let generated = generate_batch(&config, batch_idx, vector_len);
+            let mut buf = vec![0.0f64; config.batch_size * vector_len];
+            fill_batch_inplace(&config, batch_idx, vector_len, &mut buf);
+
+            assert_eq!(generated, buf);
+        }
+    }
+
+    fn assert_adjacent_batches_differ(encoding_method: &str) {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: encoding_method.to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+
+        let batch0 = generate_batch(&config, 0, vector_len);
+        let batch1 = generate_batch(&config, 1, vector_len);
+        assert_ne!(batch0, batch1);
+    }
+
+    #[test]
+    fn generate_batch_matches_fill_batch_inplace_amplitude() {
+        assert_generate_and_inplace_match("amplitude");
+    }
+
+    #[test]
+    fn generate_batch_matches_fill_batch_inplace_angle() {
+        assert_generate_and_inplace_match("angle");
+    }
+
+    #[test]
+    fn generate_batch_matches_fill_batch_inplace_basis() {
+        assert_generate_and_inplace_match("basis");
+    }
+
+    #[test]
+    fn adjacent_batches_differ_amplitude() {
+        assert_adjacent_batches_differ("amplitude");
+    }
+
+    #[test]
+    fn adjacent_batches_differ_angle() {
+        assert_adjacent_batches_differ("angle");
+    }
+
+    #[test]
+    fn adjacent_batches_differ_basis() {
+        assert_adjacent_batches_differ("basis");
+    }
+
+    #[test]
+    fn test_seed_none() {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: "amplitude".to_string(),
+            seed: None,
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let batch = generate_batch(&config, 0, vector_len);
+        assert_eq!(batch.len(), config.batch_size * vector_len);
+
+        let mut buf = vec![0.0f64; config.batch_size * vector_len];
+        fill_batch_inplace(&config, 0, vector_len, &mut buf);
+        assert_eq!(batch, buf);
+    }
+
+    #[test]
+    fn test_batch_size_one() {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 1,
+            encoding_method: "amplitude".to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let batch = generate_batch(&config, 0, vector_len);
+        assert_eq!(batch.len(), vector_len);
+
+        let mut buf = vec![0.0f64; vector_len];
+        fill_batch_inplace(&config, 0, vector_len, &mut buf);
+        assert_eq!(batch, buf);
+
+        let batch0 = generate_batch(&config, 0, vector_len);
+        let batch1 = generate_batch(&config, 1, vector_len);
+        assert_ne!(batch0, batch1);
+    }
+
+    #[test]
+    fn test_amplitude_encoding_case_insensitive() {
+        let config_lower = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: "amplitude".to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let config_upper = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: "AMPLITUDE".to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config_lower.num_qubits, 
&config_lower.encoding_method);
+        let batch_lower = generate_batch(&config_lower, 0, vector_len);
+        let batch_upper = generate_batch(&config_upper, 0, vector_len);
+        assert_eq!(batch_lower, batch_upper);
+    }
+
+    #[test]
+    fn test_amplitude_samples_in_range() {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: "amplitude".to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+
+        for batch_idx in 0..5 {
+            let batch = generate_batch(&config, batch_idx, vector_len);
+            for &value in &batch {
+                assert!(
+                    (0.0..1.0).contains(&value),
+                    "amplitude value should be in [0, 1), got {} at 
batch_idx={}",
+                    value,
+                    batch_idx
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_amplitude_samples_in_range_with_seed_none() {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 8,
+            encoding_method: "amplitude".to_string(),
+            seed: None,
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let batch = generate_batch(&config, 0, vector_len);
+
+        for &value in &batch {
+            assert!(
+                (0.0..1.0).contains(&value),
+                "amplitude value should be in [0, 1) with seed=None, got {}",
+                value
+            );
+        }
+    }
+
+    #[test]
+    fn test_amplitude_samples_in_range_batch_size_one() {
+        let config = PipelineConfig {
+            num_qubits: 5,
+            batch_size: 1,
+            encoding_method: "amplitude".to_string(),
+            seed: Some(123),
+            ..Default::default()
+        };
+
+        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let batch = generate_batch(&config, 0, vector_len);
+
+        for &value in &batch {
+            assert!(
+                (0.0..1.0).contains(&value),
+                "amplitude value should be in [0, 1) with batch_size=1, got 
{}",
+                value
+            );
+        }
+    }
+}

(mahout) branch main updated: [QDP] Optimize run_throughput_pipeline to avoid per-iteration Vec allocations (#1136)

Reply via email to