This is an automated email from the ASF dual-hosted git repository.
richhuang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 6092a6c96 [QDP] Optimize run_throughput_pipeline to avoid
per-iteration Vec allocations (#1136)
6092a6c96 is described below
commit 6092a6c96cb3071f473e1d49ac8301f13c6c420b
Author: ChenChen Lai <[email protected]>
AuthorDate: Tue Mar 10 15:28:55 2026 +0800
[QDP] Optimize run_throughput_pipeline to avoid per-iteration Vec
allocations (#1136)
* Optimize run_throughput_pipeline to avoid per-iteration Vec allocations
* fix: address clippy warning in test code
* add edge case
* add test
---------
Co-authored-by: user <0lai0>
---
qdp/qdp-core/src/pipeline_runner.rs | 242 ++++++++++++++++++++++++++++++++++--
1 file changed, 233 insertions(+), 9 deletions(-)
diff --git a/qdp/qdp-core/src/pipeline_runner.rs
b/qdp/qdp-core/src/pipeline_runner.rs
index 9a41ee4bc..df1f61a23 100644
--- a/qdp/qdp-core/src/pipeline_runner.rs
+++ b/qdp/qdp-core/src/pipeline_runner.rs
@@ -455,14 +455,16 @@ pub fn vector_len(num_qubits: u32, encoding_method: &str)
-> usize {
}
/// Deterministic sample generation matching Python utils.build_sample
(amplitude/angle/basis).
-fn fill_sample(seed: u64, out: &mut [f64], encoding_method: &str) ->
Result<()> {
+fn fill_sample(seed: u64, out: &mut [f64], encoding_method: &str, num_qubits:
usize) -> Result<()> {
let len = out.len();
if len == 0 {
return Ok(());
}
match encoding_method.to_lowercase().as_str() {
"basis" => {
- let mask = len.saturating_sub(1) as u64;
+ // For basis encoding, use 2^num_qubits as the state space size
for mask calculation
+ let state_space_size = 1 << num_qubits;
+ let mask = (state_space_size - 1) as u64;
let idx = seed & mask;
out[0] = idx as f64;
}
@@ -488,20 +490,32 @@ fn fill_sample(seed: u64, out: &mut [f64],
encoding_method: &str) -> Result<()>
/// Generate one batch (batch_size * vector_len elements, or batch_size * 1
for basis).
fn generate_batch(config: &PipelineConfig, batch_idx: usize, vector_len:
usize) -> Vec<f64> {
+ let mut batch = vec![0.0f64; config.batch_size * vector_len];
+ fill_batch_inplace(config, batch_idx, vector_len, &mut batch);
+ batch
+}
+
+/// Fill an existing batch buffer in-place (avoids per-iteration allocations
in benchmarks).
+fn fill_batch_inplace(
+ config: &PipelineConfig,
+ batch_idx: usize,
+ vector_len: usize,
+ batch_buf: &mut [f64],
+) {
+ debug_assert_eq!(batch_buf.len(), config.batch_size * vector_len);
let seed_base = config
.seed
.unwrap_or(0)
.wrapping_add((batch_idx * config.batch_size) as u64);
- let mut batch = vec![0.0f64; config.batch_size * vector_len];
for i in 0..config.batch_size {
let offset = i * vector_len;
let _ = fill_sample(
seed_base + i as u64,
- &mut batch[offset..offset + vector_len],
+ &mut batch_buf[offset..offset + vector_len],
&config.encoding_method,
+ config.num_qubits as usize,
);
}
- batch
}
/// Release DLPack tensor (call deleter so GPU memory is freed).
@@ -521,11 +535,14 @@ pub fn run_throughput_pipeline(config: &PipelineConfig)
-> Result<PipelineRunRes
let vector_len = vector_len(config.num_qubits, &config.encoding_method);
let num_qubits = config.num_qubits as usize;
+ // Reuse a single CPU batch buffer to avoid per-iteration allocations in
throughput benchmarks.
+ let mut batch_buf = vec![0.0f64; config.batch_size * vector_len];
+
// Warmup
for b in 0..config.warmup_batches {
- let batch = generate_batch(config, b, vector_len);
+ fill_batch_inplace(config, b, vector_len, &mut batch_buf);
let ptr = engine.encode_batch(
- &batch,
+ &batch_buf,
config.batch_size,
vector_len,
num_qubits,
@@ -539,9 +556,9 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) ->
Result<PipelineRunRes
let start = Instant::now();
for b in 0..config.total_batches {
- let batch = generate_batch(config, b, vector_len);
+ fill_batch_inplace(config, b, vector_len, &mut batch_buf);
let ptr = engine.encode_batch(
- &batch,
+ &batch_buf,
config.batch_size,
vector_len,
num_qubits,
@@ -569,3 +586,210 @@ pub fn run_throughput_pipeline(config: &PipelineConfig)
-> Result<PipelineRunRes
pub fn run_latency_pipeline(config: &PipelineConfig) ->
Result<PipelineRunResult> {
run_throughput_pipeline(config)
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn assert_generate_and_inplace_match(encoding_method: &str) {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: encoding_method.to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+
+ // Test edge cases: 0 and batch_size-1
+ for batch_idx in [0, config.batch_size - 1, 7] {
+ let generated = generate_batch(&config, batch_idx, vector_len);
+ let mut buf = vec![0.0f64; config.batch_size * vector_len];
+ fill_batch_inplace(&config, batch_idx, vector_len, &mut buf);
+
+ assert_eq!(generated, buf);
+ }
+ }
+
+ fn assert_adjacent_batches_differ(encoding_method: &str) {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: encoding_method.to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+
+ let batch0 = generate_batch(&config, 0, vector_len);
+ let batch1 = generate_batch(&config, 1, vector_len);
+ assert_ne!(batch0, batch1);
+ }
+
+ #[test]
+ fn generate_batch_matches_fill_batch_inplace_amplitude() {
+ assert_generate_and_inplace_match("amplitude");
+ }
+
+ #[test]
+ fn generate_batch_matches_fill_batch_inplace_angle() {
+ assert_generate_and_inplace_match("angle");
+ }
+
+ #[test]
+ fn generate_batch_matches_fill_batch_inplace_basis() {
+ assert_generate_and_inplace_match("basis");
+ }
+
+ #[test]
+ fn adjacent_batches_differ_amplitude() {
+ assert_adjacent_batches_differ("amplitude");
+ }
+
+ #[test]
+ fn adjacent_batches_differ_angle() {
+ assert_adjacent_batches_differ("angle");
+ }
+
+ #[test]
+ fn adjacent_batches_differ_basis() {
+ assert_adjacent_batches_differ("basis");
+ }
+
+ #[test]
+ fn test_seed_none() {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: "amplitude".to_string(),
+ seed: None,
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+ let batch = generate_batch(&config, 0, vector_len);
+ assert_eq!(batch.len(), config.batch_size * vector_len);
+
+ let mut buf = vec![0.0f64; config.batch_size * vector_len];
+ fill_batch_inplace(&config, 0, vector_len, &mut buf);
+ assert_eq!(batch, buf);
+ }
+
+ #[test]
+ fn test_batch_size_one() {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 1,
+ encoding_method: "amplitude".to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+ let batch = generate_batch(&config, 0, vector_len);
+ assert_eq!(batch.len(), vector_len);
+
+ let mut buf = vec![0.0f64; vector_len];
+ fill_batch_inplace(&config, 0, vector_len, &mut buf);
+ assert_eq!(batch, buf);
+
+ let batch0 = generate_batch(&config, 0, vector_len);
+ let batch1 = generate_batch(&config, 1, vector_len);
+ assert_ne!(batch0, batch1);
+ }
+
+ #[test]
+ fn test_amplitude_encoding_case_insensitive() {
+ let config_lower = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: "amplitude".to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let config_upper = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: "AMPLITUDE".to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config_lower.num_qubits,
&config_lower.encoding_method);
+ let batch_lower = generate_batch(&config_lower, 0, vector_len);
+ let batch_upper = generate_batch(&config_upper, 0, vector_len);
+ assert_eq!(batch_lower, batch_upper);
+ }
+
+ #[test]
+ fn test_amplitude_samples_in_range() {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: "amplitude".to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+
+ for batch_idx in 0..5 {
+ let batch = generate_batch(&config, batch_idx, vector_len);
+ for &value in &batch {
+ assert!(
+ (0.0..1.0).contains(&value),
+ "amplitude value should be in [0, 1), got {} at
batch_idx={}",
+ value,
+ batch_idx
+ );
+ }
+ }
+ }
+
+ #[test]
+ fn test_amplitude_samples_in_range_with_seed_none() {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 8,
+ encoding_method: "amplitude".to_string(),
+ seed: None,
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+ let batch = generate_batch(&config, 0, vector_len);
+
+ for &value in &batch {
+ assert!(
+ (0.0..1.0).contains(&value),
+ "amplitude value should be in [0, 1) with seed=None, got {}",
+ value
+ );
+ }
+ }
+
+ #[test]
+ fn test_amplitude_samples_in_range_batch_size_one() {
+ let config = PipelineConfig {
+ num_qubits: 5,
+ batch_size: 1,
+ encoding_method: "amplitude".to_string(),
+ seed: Some(123),
+ ..Default::default()
+ };
+
+ let vector_len = vector_len(config.num_qubits,
&config.encoding_method);
+ let batch = generate_batch(&config, 0, vector_len);
+
+ for &value in &batch {
+ assert!(
+ (0.0..1.0).contains(&value),
+ "amplitude value should be in [0, 1) with batch_size=1, got
{}",
+ value
+ );
+ }
+ }
+}