(mahout) branch main updated: refactor: standardize qdp-core GPU integration tests (#1172)

hcr Thu, 12 Mar 2026 00:25:01 -0700

This is an automated email from the ASF dual-hosted git repository.

hcr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/main by this push:
     new 2524716d4 refactor: standardize qdp-core GPU integration tests (#1172)
2524716d4 is described below

commit 2524716d462a1b499cbcf9c5cc2511690138458c
Author: Vic Wen <[email protected]>
AuthorDate: Thu Mar 12 15:24:49 2026 +0800

    refactor: standardize qdp-core GPU integration tests (#1172)
    
    * refactor: add shred CUDA init helper
    
    * refactor: streamline DLPack memory management in tests
    
    * refactor: rename testing files with gpu prefix
    
    * docs: update docs
---
 docs/qdp/testing.md                                |   6 +-
 qdp/docs/test/README.md                            |   6 +-
 qdp/qdp-core/tests/common/mod.rs                   |  99 ++++
 .../tests/{api_workflow.rs => gpu_api_workflow.rs} | 144 ++---
 qdp/qdp-core/tests/{dlpack.rs => gpu_dlpack.rs}    | 100 ++--
 .../tests/{iqp_encoding.rs => gpu_iqp_encoding.rs} | 155 ++----
 .../{memory_safety.rs => gpu_memory_safety.rs}     |  51 +-
 qdp/qdp-core/tests/gpu_norm_f32.rs                 |  30 +-
 qdp/qdp-core/tests/gpu_ptr_encoding.rs             | 601 +++++++--------------
 .../tests/{validation.rs => gpu_validation.rs}     |  37 +-
 10 files changed, 455 insertions(+), 774 deletions(-)

diff --git a/docs/qdp/testing.md b/docs/qdp/testing.md
index ce7d659d9..b5b35a2d6 100644
--- a/docs/qdp/testing.md
+++ b/docs/qdp/testing.md
@@ -47,9 +47,9 @@ Unit tests for QDP core library covering input validation, 
API workflows, and me
 cargo test --package qdp-core
 
 # Run specific test file
-cargo test --package qdp-core --test validation
-cargo test --package qdp-core --test api_workflow
-cargo test --package qdp-core --test memory_safety
+cargo test --package qdp-core --test gpu_validation
+cargo test --package qdp-core --test gpu_api_workflow
+cargo test --package qdp-core --test gpu_memory_safety
 ```
 
 ## Requirements
diff --git a/qdp/docs/test/README.md b/qdp/docs/test/README.md
index 1c24ba783..15445045c 100644
--- a/qdp/docs/test/README.md
+++ b/qdp/docs/test/README.md
@@ -42,9 +42,9 @@ Unit tests for QDP core library covering input validation, 
API workflows, and me
 cargo test --package qdp-core
 
 # Run specific test file
-cargo test --package qdp-core --test validation
-cargo test --package qdp-core --test api_workflow
-cargo test --package qdp-core --test memory_safety
+cargo test --package qdp-core --test gpu_validation
+cargo test --package qdp-core --test gpu_api_workflow
+cargo test --package qdp-core --test gpu_memory_safety
 ```
 
 ## Requirements
diff --git a/qdp/qdp-core/tests/common/mod.rs b/qdp/qdp-core/tests/common/mod.rs
index 25e43c262..5eb128f4a 100644
--- a/qdp/qdp-core/tests/common/mod.rs
+++ b/qdp/qdp-core/tests/common/mod.rs
@@ -14,6 +14,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#[cfg(target_os = "linux")]
+use std::sync::Arc;
+
+#[cfg(target_os = "linux")]
+use cudarc::driver::{CudaDevice, CudaSlice};
+#[cfg(target_os = "linux")]
+use qdp_core::dlpack::DLManagedTensor;
+#[cfg(target_os = "linux")]
+use qdp_core::{Precision, QdpEngine};
+
 /// Creates normalized test data (f64)
 #[allow(dead_code)] // Used by multiple test modules
 pub fn create_test_data(size: usize) -> Vec<f64> {
@@ -25,3 +35,92 @@ pub fn create_test_data(size: usize) -> Vec<f64> {
 pub fn create_test_data_f32(size: usize) -> Vec<f32> {
     (0..size).map(|i| (i as f32) / (size as f32)).collect()
 }
+
+/// Returns a CUDA device handle, or `None` when CUDA is unavailable for the 
test environment.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn cuda_device() -> Option<Arc<CudaDevice>> {
+    CudaDevice::new(0).ok()
+}
+
+/// Returns a QDP engine, or `None` when GPU-backed engine initialization is 
unavailable.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn qdp_engine() -> Option<QdpEngine> {
+    QdpEngine::new(0).ok()
+}
+
+/// Returns a QDP engine with the requested precision, or `None` when 
unavailable.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn qdp_engine_with_precision(precision: Precision) -> Option<QdpEngine> {
+    QdpEngine::new_with_precision(0, precision).ok()
+}
+
+/// Copies f64 host data to the default CUDA device, or returns `None` when 
unavailable.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn copy_f64_to_device(data: &[f64]) -> Option<(Arc<CudaDevice>, 
CudaSlice<f64>)> {
+    let device = cuda_device()?;
+    let slice = device.htod_sync_copy(data).ok()?;
+    Some((device, slice))
+}
+
+/// Copies f32 host data to the default CUDA device, or returns `None` when 
unavailable.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn copy_f32_to_device(data: &[f32]) -> Option<(Arc<CudaDevice>, 
CudaSlice<f32>)> {
+    let device = cuda_device()?;
+    let slice = device.htod_sync_copy(data).ok()?;
+    Some((device, slice))
+}
+
+/// Copies usize host data to the default CUDA device, or returns `None` when 
unavailable.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub fn copy_usize_to_device(data: &[usize]) -> Option<(Arc<CudaDevice>, 
CudaSlice<usize>)> {
+    let device = cuda_device()?;
+    let slice = device.htod_sync_copy(data).ok()?;
+    Some((device, slice))
+}
+
+/// Asserts a DLPack tensor is 2D with the expected shape.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub unsafe fn assert_dlpack_shape_2d(dlpack_ptr: *mut DLManagedTensor, dim0: 
i64, dim1: i64) {
+    assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
+
+    let tensor = unsafe { &(*dlpack_ptr).dl_tensor };
+    assert_eq!(tensor.ndim, 2, "DLPack tensor should be 2D");
+
+    let shape = unsafe { std::slice::from_raw_parts(tensor.shape, 2) };
+    assert_eq!(shape[0], dim0, "Unexpected first dimension");
+    assert_eq!(shape[1], dim1, "Unexpected second dimension");
+}
+
+/// Asserts a DLPack tensor is 2D with the expected shape and then frees it 
via its deleter.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub unsafe fn assert_dlpack_shape_2d_and_delete(
+    dlpack_ptr: *mut DLManagedTensor,
+    dim0: i64,
+    dim1: i64,
+) {
+    unsafe { assert_dlpack_shape_2d(dlpack_ptr, dim0, dim1) };
+
+    unsafe { take_deleter_and_delete(dlpack_ptr) };
+}
+
+/// Takes the DLPack deleter from the managed tensor and invokes it exactly 
once.
+#[cfg(target_os = "linux")]
+#[allow(dead_code)]
+pub unsafe fn take_deleter_and_delete(dlpack_ptr: *mut DLManagedTensor) {
+    assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
+
+    let managed = unsafe { &mut *dlpack_ptr };
+    let deleter = managed
+        .deleter
+        .take()
+        .expect("DLPack deleter should be present");
+    unsafe { deleter(dlpack_ptr) };
+}
diff --git a/qdp/qdp-core/tests/api_workflow.rs 
b/qdp/qdp-core/tests/gpu_api_workflow.rs
similarity index 68%
rename from qdp/qdp-core/tests/api_workflow.rs
rename to qdp/qdp-core/tests/gpu_api_workflow.rs
index bc94d4e34..6c8e65166 100644
--- a/qdp/qdp-core/tests/api_workflow.rs
+++ b/qdp/qdp-core/tests/gpu_api_workflow.rs
@@ -16,8 +16,6 @@
 
 // API workflow tests: Engine initialization and encoding
 
-#[cfg(target_os = "linux")]
-use cudarc::driver::CudaDevice;
 #[cfg(target_os = "linux")]
 use qdp_core::MahoutError;
 use qdp_core::QdpEngine;
@@ -52,12 +50,9 @@ fn test_engine_initialization() {
 fn test_amplitude_encoding_workflow() {
     println!("Testing amplitude encoding workflow...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let data = common::create_test_data(1024);
@@ -70,15 +65,8 @@ fn test_amplitude_encoding_workflow() {
 
     // Simulate PyTorch behavior: manually call deleter to free GPU memory
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        assert!(managed.deleter.is_some(), "Deleter must be present");
-
         println!("Calling deleter to free GPU memory");
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter function pointer is missing!");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
         println!("PASS: Memory freed successfully");
     }
 }
@@ -88,12 +76,9 @@ fn test_amplitude_encoding_workflow() {
 fn test_amplitude_encoding_async_pipeline() {
     println!("Testing amplitude encoding async pipeline path...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Use 200000 elements to trigger async pipeline path (ASYNC_THRESHOLD = 
131072)
@@ -106,15 +91,8 @@ fn test_amplitude_encoding_async_pipeline() {
     println!("PASS: Encoding succeeded, DLPack pointer valid");
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        assert!(managed.deleter.is_some(), "Deleter must be present");
-
         println!("Calling deleter to free GPU memory");
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter function pointer is missing!");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
         println!("PASS: Memory freed successfully");
     }
 }
@@ -124,12 +102,9 @@ fn test_amplitude_encoding_async_pipeline() {
 fn test_angle_encoding_async_pipeline() {
     println!("Testing angle encoding async pipeline path...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 4;
@@ -143,15 +118,8 @@ fn test_angle_encoding_async_pipeline() {
     println!("PASS: Angle batch encoding succeeded, DLPack pointer valid");
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        assert!(managed.deleter.is_some(), "Deleter must be present");
-
         println!("Calling deleter to free GPU memory");
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter function pointer is missing!");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
         println!("PASS: Memory freed successfully");
     }
 }
@@ -161,12 +129,9 @@ fn test_angle_encoding_async_pipeline() {
 fn test_angle_async_alignment_error() {
     println!("Testing angle async pipeline alignment error...");
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(device) = common::cuda_device() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let misaligned_data = vec![0.0_f64; 10];
@@ -192,12 +157,9 @@ fn test_angle_async_alignment_error() {
 fn test_batch_dlpack_2d_shape() {
     println!("Testing batch DLPack 2D shape...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Create batch data: 3 samples, each with 4 elements (2 qubits)
@@ -216,25 +178,12 @@ fn test_batch_dlpack_2d_shape() {
         "amplitude",
     );
     let dlpack_ptr = result.expect("Batch encoding should succeed");
-    assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
 
     unsafe {
-        let managed = &*dlpack_ptr;
+        let managed = &mut *dlpack_ptr;
         let tensor = &managed.dl_tensor;
 
-        // Verify 2D shape for batch tensor
-        assert_eq!(tensor.ndim, 2, "Batch tensor should be 2D");
-
-        let shape_slice = std::slice::from_raw_parts(tensor.shape, tensor.ndim 
as usize);
-        assert_eq!(
-            shape_slice[0], num_samples as i64,
-            "First dimension should be num_samples"
-        );
-        assert_eq!(
-            shape_slice[1],
-            (1 << num_qubits) as i64,
-            "Second dimension should be 2^num_qubits"
-        );
+        common::assert_dlpack_shape_2d(dlpack_ptr, num_samples as i64, (1 << 
num_qubits) as i64);
 
         let strides_slice = std::slice::from_raw_parts(tensor.strides, 
tensor.ndim as usize);
         let state_len = 1 << num_qubits;
@@ -249,17 +198,15 @@ fn test_batch_dlpack_2d_shape() {
 
         println!(
             "PASS: Batch DLPack tensor has correct 2D shape: [{}, {}]",
-            shape_slice[0], shape_slice[1]
+            num_samples,
+            1 << num_qubits
         );
         println!(
             "PASS: Strides are correct: [{}, {}]",
             strides_slice[0], strides_slice[1]
         );
 
-        // Free memory
-        if let Some(deleter) = managed.deleter {
-            deleter(dlpack_ptr);
-        }
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
@@ -268,12 +215,9 @@ fn test_batch_dlpack_2d_shape() {
 fn test_single_encode_dlpack_2d_shape() {
     println!("Testing single encode returns 2D shape...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let data = common::create_test_data(16);
@@ -281,21 +225,12 @@ fn test_single_encode_dlpack_2d_shape() {
     assert!(result.is_ok(), "Encoding should succeed");
 
     let dlpack_ptr = result.unwrap();
-    assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
 
     unsafe {
-        let managed = &*dlpack_ptr;
+        let managed = &mut *dlpack_ptr;
         let tensor = &managed.dl_tensor;
 
-        // Verify 2D shape for single encode: [1, 2^num_qubits]
-        assert_eq!(tensor.ndim, 2, "Single encode should be 2D");
-
-        let shape_slice = std::slice::from_raw_parts(tensor.shape, tensor.ndim 
as usize);
-        assert_eq!(
-            shape_slice[0], 1,
-            "First dimension should be 1 for single encode"
-        );
-        assert_eq!(shape_slice[1], 16, "Second dimension should be [2^4]");
+        common::assert_dlpack_shape_2d(dlpack_ptr, 1, 16);
 
         let strides_slice = std::slice::from_raw_parts(tensor.strides, 
tensor.ndim as usize);
         assert_eq!(
@@ -307,15 +242,9 @@ fn test_single_encode_dlpack_2d_shape() {
             "Stride for second dimension should be 1"
         );
 
-        println!(
-            "PASS: Single encode returns 2D shape: [{}, {}]",
-            shape_slice[0], shape_slice[1]
-        );
+        println!("PASS: Single encode returns 2D shape: [{}, {}]", 1, 16);
 
-        // Free memory
-        if let Some(deleter) = managed.deleter {
-            deleter(dlpack_ptr);
-        }
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
@@ -324,12 +253,9 @@ fn test_single_encode_dlpack_2d_shape() {
 fn test_dlpack_device_id() {
     println!("Testing DLPack device_id propagation...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let data = common::create_test_data(16);
@@ -362,8 +288,6 @@ fn test_dlpack_device_id() {
         );
 
         // Free memory
-        if let Some(deleter) = managed.deleter {
-            deleter(dlpack_ptr);
-        }
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
diff --git a/qdp/qdp-core/tests/dlpack.rs b/qdp/qdp-core/tests/gpu_dlpack.rs
similarity index 76%
rename from qdp/qdp-core/tests/dlpack.rs
rename to qdp/qdp-core/tests/gpu_dlpack.rs
index 29cfc74ce..f84bcd38e 100644
--- a/qdp/qdp-core/tests/dlpack.rs
+++ b/qdp/qdp-core/tests/gpu_dlpack.rs
@@ -16,11 +16,14 @@
 
 // DLPack protocol for zero-copy GPU memory sharing with PyTorch
 
+#[path = "common/mod.rs"]
+mod common;
+
 #[cfg(test)]
 mod dlpack_tests {
     use std::ffi::c_void;
 
-    use cudarc::driver::CudaDevice;
+    use super::common;
     use qdp_core::MahoutError;
     use qdp_core::Precision;
     use qdp_core::dlpack::{
@@ -31,7 +34,9 @@ mod dlpack_tests {
 
     #[test]
     fn test_dlpack_batch_shape() {
-        let device = CudaDevice::new(0).unwrap();
+        let Some(device) = common::cuda_device() else {
+            return;
+        };
 
         let num_samples = 4;
         let num_qubits = 2; // 2^2 = 4 elements per sample
@@ -40,62 +45,37 @@ mod dlpack_tests {
                 .expect("Failed to create batch state vector");
 
         let dlpack_ptr = state_vector.to_dlpack();
-        assert!(!dlpack_ptr.is_null());
-
         unsafe {
-            let tensor = &(*dlpack_ptr).dl_tensor;
-
-            // Verify ndim is 2
-            assert_eq!(tensor.ndim, 2, "DLPack tensor should be 2D for batch");
-
-            // Verify shape
-            let shape = std::slice::from_raw_parts(tensor.shape, 2);
-            assert_eq!(shape[0], num_samples as i64, "Batch size mismatch");
-            assert_eq!(shape[1], (1 << num_qubits) as i64, "State size 
mismatch");
-
-            // Clean up using the deleter
-            if let Some(deleter) = (*dlpack_ptr).deleter {
-                deleter(dlpack_ptr);
-            }
-        }
+            common::assert_dlpack_shape_2d_and_delete(
+                dlpack_ptr,
+                num_samples as i64,
+                (1 << num_qubits) as i64,
+            )
+        };
     }
 
     #[test]
     fn test_dlpack_single_shape() {
-        let device = CudaDevice::new(0).unwrap();
+        let Some(device) = common::cuda_device() else {
+            return;
+        };
 
         let num_qubits = 2;
         let state_vector = GpuStateVector::new(&device, num_qubits, 
Precision::Float64)
             .expect("Failed to create state vector");
 
         let dlpack_ptr = state_vector.to_dlpack();
-        assert!(!dlpack_ptr.is_null());
-
         unsafe {
-            let tensor = &(*dlpack_ptr).dl_tensor;
-
-            // Verify ndim is 2 (even for single sample, per the fix)
-            assert_eq!(
-                tensor.ndim, 2,
-                "DLPack tensor should be 2D for single sample"
-            );
-
-            // Verify shape
-            let shape = std::slice::from_raw_parts(tensor.shape, 2);
-            assert_eq!(shape[0], 1, "Batch size should be 1 for single 
sample");
-            assert_eq!(shape[1], (1 << num_qubits) as i64, "State size 
mismatch");
-
-            // Clean up using the deleter
-            if let Some(deleter) = (*dlpack_ptr).deleter {
-                deleter(dlpack_ptr);
-            }
-        }
+            common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, (1 << 
num_qubits) as i64)
+        };
     }
 
     #[test]
     #[cfg(target_os = "linux")]
     fn test_dlpack_single_shape_f32() {
-        let device = CudaDevice::new(0).unwrap();
+        let Some(device) = common::cuda_device() else {
+            return;
+        };
 
         let num_qubits = 2;
         let state_vector = GpuStateVector::new(&device, num_qubits, 
Precision::Float32)
@@ -111,24 +91,17 @@ mod dlpack_tests {
         );
 
         let dlpack_ptr = state_vector.to_dlpack();
-        assert!(!dlpack_ptr.is_null());
-
         unsafe {
-            let tensor = &(*dlpack_ptr).dl_tensor;
-            assert_eq!(tensor.ndim, 2, "DLPack tensor should be 2D");
-            let shape = std::slice::from_raw_parts(tensor.shape, 2);
-            assert_eq!(shape[0], 1);
-            assert_eq!(shape[1], (1 << num_qubits) as i64);
-            if let Some(deleter) = (*dlpack_ptr).deleter {
-                deleter(dlpack_ptr);
-            }
-        }
+            common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, (1 << 
num_qubits) as i64)
+        };
     }
 
     #[test]
     #[cfg(target_os = "linux")]
     fn test_dlpack_batch_shape_f32() {
-        let device = CudaDevice::new(0).unwrap();
+        let Some(device) = common::cuda_device() else {
+            return;
+        };
 
         let num_samples = 3;
         let num_qubits = 2;
@@ -146,18 +119,13 @@ mod dlpack_tests {
         );
 
         let dlpack_ptr = state_vector.to_dlpack();
-        assert!(!dlpack_ptr.is_null());
-
         unsafe {
-            let tensor = &(*dlpack_ptr).dl_tensor;
-            assert_eq!(tensor.ndim, 2, "DLPack tensor should be 2D");
-            let shape = std::slice::from_raw_parts(tensor.shape, 2);
-            assert_eq!(shape[0], num_samples as i64);
-            assert_eq!(shape[1], (1 << num_qubits) as i64);
-            if let Some(deleter) = (*dlpack_ptr).deleter {
-                deleter(dlpack_ptr);
-            }
-        }
+            common::assert_dlpack_shape_2d_and_delete(
+                dlpack_ptr,
+                num_samples as i64,
+                (1 << num_qubits) as i64,
+            )
+        };
     }
 
     /// synchronize_stream(null) is a no-op and returns Ok(()) on all 
platforms.
@@ -176,6 +144,10 @@ mod dlpack_tests {
     #[test]
     #[cfg(target_os = "linux")]
     fn test_synchronize_stream_legacy() {
+        if common::cuda_device().is_none() {
+            return;
+        }
+
         unsafe {
             let result = synchronize_stream(CUDA_STREAM_LEGACY);
             assert!(
diff --git a/qdp/qdp-core/tests/iqp_encoding.rs 
b/qdp/qdp-core/tests/gpu_iqp_encoding.rs
similarity index 89%
rename from qdp/qdp-core/tests/iqp_encoding.rs
rename to qdp/qdp-core/tests/gpu_iqp_encoding.rs
index 7f976d0ad..6ca8e987a 100644
--- a/qdp/qdp-core/tests/iqp_encoding.rs
+++ b/qdp/qdp-core/tests/gpu_iqp_encoding.rs
@@ -16,7 +16,7 @@
 
 // Unit tests for IQP (Instantaneous Quantum Polynomial) encoding
 
-use qdp_core::{MahoutError, QdpEngine};
+use qdp_core::MahoutError;
 
 mod common;
 
@@ -39,9 +39,8 @@ fn iqp_z_data_len(num_qubits: usize) -> usize {
 fn test_iqp_zero_qubits_rejected() {
     println!("Testing IQP zero qubits rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = vec![0.5; 1];
@@ -65,9 +64,8 @@ fn test_iqp_zero_qubits_rejected() {
 fn test_iqp_max_qubits_exceeded() {
     println!("Testing IQP max qubits (>30) rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = vec![0.5; iqp_full_data_len(31)];
@@ -88,9 +86,8 @@ fn test_iqp_max_qubits_exceeded() {
 fn test_iqp_wrong_data_length() {
     println!("Testing IQP wrong data length rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 4;
@@ -126,9 +123,8 @@ fn test_iqp_wrong_data_length() {
 fn test_iqp_z_wrong_data_length() {
     println!("Testing IQP-Z wrong data length rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 4;
@@ -156,9 +152,8 @@ fn test_iqp_z_wrong_data_length() {
 fn test_iqp_nan_value_rejected() {
     println!("Testing IQP NaN value rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 3;
@@ -185,9 +180,8 @@ fn test_iqp_nan_value_rejected() {
 fn test_iqp_infinity_value_rejected() {
     println!("Testing IQP infinity value rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 3;
@@ -218,12 +212,9 @@ fn test_iqp_infinity_value_rejected() {
 fn test_iqp_full_encoding_workflow() {
     println!("Testing IQP full encoding workflow...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 4;
@@ -267,12 +258,9 @@ fn test_iqp_full_encoding_workflow() {
 fn test_iqp_z_encoding_workflow() {
     println!("Testing IQP-Z encoding workflow...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 5;
@@ -315,12 +303,9 @@ fn test_iqp_z_encoding_workflow() {
 fn test_iqp_single_qubit() {
     println!("Testing IQP single qubit encoding...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Single qubit IQP full: 1 parameter (no ZZ terms with only 1 qubit)
@@ -358,12 +343,9 @@ fn test_iqp_single_qubit() {
 fn test_iqp_batch_encoding() {
     println!("Testing IQP batch encoding...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
@@ -411,12 +393,9 @@ fn test_iqp_batch_encoding() {
 fn test_iqp_z_batch_encoding() {
     println!("Testing IQP-Z batch encoding...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 4;
@@ -464,9 +443,8 @@ fn test_iqp_z_batch_encoding() {
 fn test_iqp_batch_wrong_sample_size() {
     println!("Testing IQP batch wrong sample_size rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 3;
@@ -501,9 +479,8 @@ fn test_iqp_batch_wrong_sample_size() {
 fn test_iqp_batch_data_length_mismatch() {
     println!("Testing IQP batch data length mismatch rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 3;
@@ -533,9 +510,8 @@ fn test_iqp_batch_data_length_mismatch() {
 fn test_iqp_batch_nan_in_sample() {
     println!("Testing IQP batch NaN value rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let num_qubits = 3;
@@ -594,12 +570,9 @@ fn test_iqp_data_length_calculations() {
 fn test_iqp_fwt_threshold_boundary() {
     println!("Testing IQP FWT threshold boundary (n=4, where FWT kicks 
in)...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Test at FWT_MIN_QUBITS threshold (n=4)
@@ -640,12 +613,9 @@ fn test_iqp_fwt_threshold_boundary() {
 fn test_iqp_fwt_larger_qubit_counts() {
     println!("Testing IQP FWT with larger qubit counts (n=5,6,7,8)...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     for num_qubits in [5, 6, 7, 8] {
@@ -689,12 +659,9 @@ fn test_iqp_fwt_larger_qubit_counts() {
 fn test_iqp_z_fwt_correctness() {
     println!("Testing IQP-Z FWT correctness for various qubit counts...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Test IQP-Z across FWT threshold
@@ -734,12 +701,9 @@ fn test_iqp_z_fwt_correctness() {
 fn test_iqp_fwt_batch_various_sizes() {
     println!("Testing IQP FWT batch encoding with various qubit counts...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Test batch encoding across FWT threshold
@@ -787,12 +751,9 @@ fn test_iqp_fwt_batch_various_sizes() {
 fn test_iqp_fwt_zero_parameters_identity() {
     println!("Testing IQP FWT with zero parameters produces |0⟩ state...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // For FWT-optimized path (n >= 4), zero parameters should still give |0⟩
@@ -833,12 +794,9 @@ fn test_iqp_fwt_zero_parameters_identity() {
 fn test_iqp_encoder_via_factory() {
     println!("Testing IQP encoder creation via get_encoder...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     // Test that "iqp" and "IQP" work (case insensitive)
@@ -873,12 +831,9 @@ fn test_iqp_encoder_via_factory() {
 fn test_iqp_z_encoder_via_factory() {
     println!("Testing IQP-Z encoder creation via get_encoder...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
diff --git a/qdp/qdp-core/tests/memory_safety.rs 
b/qdp/qdp-core/tests/gpu_memory_safety.rs
similarity index 82%
rename from qdp/qdp-core/tests/memory_safety.rs
rename to qdp/qdp-core/tests/gpu_memory_safety.rs
index 4b6c9aa97..33e937a37 100644
--- a/qdp/qdp-core/tests/memory_safety.rs
+++ b/qdp/qdp-core/tests/gpu_memory_safety.rs
@@ -16,7 +16,7 @@
 
 // Memory safety tests: DLPack lifecycle, RAII, Arc reference counting
 
-use qdp_core::{Precision, QdpEngine};
+use qdp_core::Precision;
 
 mod common;
 
@@ -26,12 +26,9 @@ fn test_memory_pressure() {
     println!("Testing memory pressure (leak detection)");
     println!("Running 100 iterations of encode + free");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let data = common::create_test_data(1024);
@@ -42,12 +39,7 @@ fn test_memory_pressure() {
             .expect("Encoding should succeed");
 
         unsafe {
-            let managed = &mut *ptr;
-            let deleter = managed
-                .deleter
-                .take()
-                .expect("Deleter missing in pressure test!");
-            deleter(ptr);
+            common::take_deleter_and_delete(ptr);
         }
 
         if (i + 1) % 25 == 0 {
@@ -63,9 +55,8 @@ fn test_memory_pressure() {
 fn test_multiple_concurrent_states() {
     println!("Testing multiple concurrent state vectors...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data1 = common::create_test_data(256);
@@ -81,9 +72,9 @@ fn test_multiple_concurrent_states() {
     // Free in different order to test Arc reference counting
     unsafe {
         println!("Freeing in order: 2, 1, 3");
-        (&mut *ptr2).deleter.take().expect("Deleter missing!")(ptr2);
-        (&mut *ptr1).deleter.take().expect("Deleter missing!")(ptr1);
-        (&mut *ptr3).deleter.take().expect("Deleter missing!")(ptr3);
+        common::take_deleter_and_delete(ptr2);
+        common::take_deleter_and_delete(ptr1);
+        common::take_deleter_and_delete(ptr3);
     }
 
     println!("PASS: All states freed successfully");
@@ -94,9 +85,8 @@ fn test_multiple_concurrent_states() {
 fn test_dlpack_tensor_metadata_default() {
     println!("Testing DLPack tensor metadata...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = common::create_test_data(1024);
@@ -136,11 +126,7 @@ fn test_dlpack_tensor_metadata_default() {
             tensor.dtype.code, tensor.dtype.bits
         );
 
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter missing in metadata test!");
-        deleter(ptr);
+        common::take_deleter_and_delete(ptr);
     }
 }
 
@@ -149,9 +135,8 @@ fn test_dlpack_tensor_metadata_default() {
 fn test_dlpack_tensor_metadata_f64() {
     println!("Testing DLPack tensor metadata...");
 
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
 
     let data = common::create_test_data(1024);
@@ -192,10 +177,6 @@ fn test_dlpack_tensor_metadata_f64() {
             tensor.dtype.code, tensor.dtype.bits
         );
 
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter missing in metadata test!");
-        deleter(ptr);
+        common::take_deleter_and_delete(ptr);
     }
 }
diff --git a/qdp/qdp-core/tests/gpu_norm_f32.rs 
b/qdp/qdp-core/tests/gpu_norm_f32.rs
index 40be53efe..af1150881 100644
--- a/qdp/qdp-core/tests/gpu_norm_f32.rs
+++ b/qdp/qdp-core/tests/gpu_norm_f32.rs
@@ -21,27 +21,24 @@
 #![cfg(target_os = "linux")]
 
 use approx::assert_relative_eq;
-use cudarc::driver::{CudaDevice, DevicePtr};
+use cudarc::driver::DevicePtr;
 use qdp_core::gpu::encodings::amplitude::AmplitudeEncoder;
 
+mod common;
+
 #[test]
 fn test_calculate_inv_norm_gpu_f32_basic() {
     println!("Testing AmplitudeEncoder::calculate_inv_norm_gpu_f32 (basic 
case)...");
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device available");
-            return;
-        }
-    };
-
     // Input: [3.0, 4.0] -> norm = 5.0, inv_norm = 0.2
     let input: Vec<f32> = vec![3.0, 4.0];
     let expected_norm = (3.0_f32.powi(2) + 4.0_f32.powi(2)).sqrt();
     let expected_inv_norm = 1.0_f32 / expected_norm;
 
-    let input_d = device.htod_sync_copy(input.as_slice()).unwrap();
+    let Some((device, input_d)) = common::copy_f32_to_device(input.as_slice()) 
else {
+        println!("SKIP: No CUDA device available");
+        return;
+    };
     let inv = unsafe {
         AmplitudeEncoder::calculate_inv_norm_gpu_f32(
             &device,
@@ -58,16 +55,11 @@ fn test_calculate_inv_norm_gpu_f32_basic() {
 fn test_calculate_inv_norm_gpu_f32_invalid_zero() {
     println!("Testing AmplitudeEncoder::calculate_inv_norm_gpu_f32 with zero 
vector...");
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device available");
-            return;
-        }
-    };
-
     let input: Vec<f32> = vec![0.0, 0.0, 0.0];
-    let input_d = device.htod_sync_copy(input.as_slice()).unwrap();
+    let Some((device, input_d)) = common::copy_f32_to_device(input.as_slice()) 
else {
+        println!("SKIP: No CUDA device available");
+        return;
+    };
 
     let result = unsafe {
         AmplitudeEncoder::calculate_inv_norm_gpu_f32(
diff --git a/qdp/qdp-core/tests/gpu_ptr_encoding.rs 
b/qdp/qdp-core/tests/gpu_ptr_encoding.rs
index ef7ffb9d5..470402727 100644
--- a/qdp/qdp-core/tests/gpu_ptr_encoding.rs
+++ b/qdp/qdp-core/tests/gpu_ptr_encoding.rs
@@ -18,11 +18,9 @@
 
 #![cfg(target_os = "linux")]
 
-use std::ffi::c_void;
-use std::sync::Arc;
-
-use cudarc::driver::{CudaDevice, CudaSlice, DevicePtr, DeviceSlice};
+use cudarc::driver::{DevicePtr, DeviceSlice};
 use qdp_core::{MahoutError, Precision, QdpEngine};
+use std::ffi::c_void;
 
 mod common;
 
@@ -39,65 +37,21 @@ fn iqp_z_data_len(num_qubits: usize) -> usize {
 // ---- Helpers for f32 encode_from_gpu_ptr_f32 tests ----
 
 fn engine_f32() -> Option<QdpEngine> {
-    QdpEngine::new_with_precision(0, Precision::Float32).ok()
-}
-
-fn device_and_f32_slice(data: &[f32]) -> Option<(Arc<CudaDevice>, 
CudaSlice<f32>)> {
-    let device = CudaDevice::new(0).ok()?;
-    let slice = device.htod_sync_copy(data).ok()?;
-    Some((device, slice))
-}
-
-fn assert_dlpack_shape_2_4_and_delete(dlpack_ptr: *mut 
qdp_core::dlpack::DLManagedTensor) {
-    assert!(!dlpack_ptr.is_null());
-    unsafe {
-        let tensor = &(*dlpack_ptr).dl_tensor;
-        assert_eq!(tensor.ndim, 2);
-        let shape = std::slice::from_raw_parts(tensor.shape, 2);
-        assert_eq!(shape[0], 1);
-        assert_eq!(shape[1], 4);
-        if let Some(deleter) = (*dlpack_ptr).deleter {
-            deleter(dlpack_ptr);
-        }
-    }
-}
-
-fn assert_dlpack_batch_shape_and_delete(
-    dlpack_ptr: *mut qdp_core::dlpack::DLManagedTensor,
-    num_samples: i64,
-    state_len: i64,
-) {
-    assert!(!dlpack_ptr.is_null());
-    unsafe {
-        let tensor = &(*dlpack_ptr).dl_tensor;
-        assert_eq!(tensor.ndim, 2);
-        let shape = std::slice::from_raw_parts(tensor.shape, 2);
-        assert_eq!(shape[0], num_samples);
-        assert_eq!(shape[1], state_len);
-        if let Some(deleter) = (*dlpack_ptr).deleter {
-            deleter(dlpack_ptr);
-        }
-    }
+    common::qdp_engine_with_precision(Precision::Float32)
 }
 
 // ---- Validation / error-path tests (return before using pointer) ----
 
 #[test]
 fn test_encode_from_gpu_ptr_unknown_method() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     // Need valid GPU pointer so we reach method dispatch (validation runs 
first)
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
     let data = common::create_test_data(4);
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
 
@@ -117,9 +71,8 @@ fn test_encode_from_gpu_ptr_unknown_method() {
 
 #[test]
 fn test_encode_from_gpu_ptr_amplitude_empty_input() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let result = unsafe { engine.encode_from_gpu_ptr(std::ptr::null(), 0, 2, 
"amplitude") };
@@ -135,20 +88,14 @@ fn test_encode_from_gpu_ptr_amplitude_empty_input() {
 
 #[test]
 fn test_encode_from_gpu_ptr_amplitude_input_exceeds_state() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     // Need valid GPU pointer so we reach input_len > state_len check 
(validation runs first)
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
     let data = common::create_test_data(10);
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
 
@@ -166,20 +113,14 @@ fn 
test_encode_from_gpu_ptr_amplitude_input_exceeds_state() {
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_unknown_method() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     // Need valid GPU pointer so we reach method dispatch (validation runs 
first)
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
     let data = common::create_test_data(8);
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
 
@@ -199,9 +140,8 @@ fn test_encode_batch_from_gpu_ptr_unknown_method() {
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_amplitude_num_samples_zero() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let result =
@@ -218,20 +158,14 @@ fn 
test_encode_batch_from_gpu_ptr_amplitude_num_samples_zero() {
 
 #[test]
 fn test_encode_from_gpu_ptr_basis_input_len_not_one() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     // Need valid GPU pointer so we reach basis input_len checks (validation 
runs first)
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
     let indices: Vec<usize> = vec![0, 1, 2];
-    let indices_d = match device.htod_sync_copy(indices.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, indices_d)) = 
common::copy_usize_to_device(indices.as_slice()) else {
+        return;
     };
     let ptr = *indices_d.device_ptr() as *const usize as *const c_void;
 
@@ -262,20 +196,14 @@ fn test_encode_from_gpu_ptr_basis_input_len_not_one() {
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_basis_sample_size_not_one() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     // Need valid GPU pointer so we reach basis sample_size check (validation 
runs first)
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
     let indices: Vec<usize> = vec![0, 1];
-    let indices_d = match device.htod_sync_copy(indices.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, indices_d)) = 
common::copy_usize_to_device(indices.as_slice()) else {
+        return;
     };
     let ptr = *indices_d.device_ptr() as *const usize as *const c_void;
 
@@ -297,32 +225,18 @@ fn 
test_encode_batch_from_gpu_ptr_basis_sample_size_not_one() {
 
 #[test]
 fn test_encode_from_gpu_ptr_amplitude_success() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 4;
     let state_len = 1 << num_qubits;
     let data = common::create_test_data(state_len);
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
@@ -336,44 +250,24 @@ fn test_encode_from_gpu_ptr_amplitude_success() {
     assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        assert!(managed.deleter.is_some(), "Deleter must be present");
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter function pointer is missing");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_with_stream_amplitude_success() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
     let state_len = 1 << num_qubits;
     let data = common::create_test_data(state_len);
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
@@ -393,20 +287,15 @@ fn 
test_encode_from_gpu_ptr_with_stream_amplitude_success() {
     assert!(!dlpack_ptr.is_null());
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        let deleter = managed.deleter.take().expect("Deleter missing");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_amplitude_success() {
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine() else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
@@ -416,20 +305,9 @@ fn test_encode_batch_from_gpu_ptr_amplitude_success() {
     let total = num_samples * sample_size;
     let data = common::create_test_data(total);
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
@@ -443,41 +321,25 @@ fn test_encode_batch_from_gpu_ptr_amplitude_success() {
     assert!(!dlpack_ptr.is_null());
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        let deleter = managed.deleter.take().expect("Deleter missing");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_basis_success() {
     // Basis path uses ptr_f64(); engine must be Float64
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
     let basis_index: usize = 0;
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
     let indices: Vec<usize> = vec![basis_index];
-    let indices_d = match device.htod_sync_copy(indices.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, indices_d)) = 
common::copy_usize_to_device(indices.as_slice()) else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *indices_d.device_ptr() as *const usize as *const c_void;
@@ -491,25 +353,16 @@ fn test_encode_from_gpu_ptr_basis_success() {
     assert!(!dlpack_ptr.is_null());
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        assert!(managed.deleter.is_some(), "Deleter must be present");
-        let deleter = managed
-            .deleter
-            .take()
-            .expect("Deleter function pointer is missing");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_basis_success() {
     // Basis path uses ptr_f64(); engine must be Float64
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 3;
@@ -518,20 +371,9 @@ fn test_encode_batch_from_gpu_ptr_basis_success() {
     let state_len = 1 << num_qubits;
     let basis_indices: Vec<usize> = (0..num_samples).map(|i| i % 
state_len).collect();
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let indices_d = match device.htod_sync_copy(basis_indices.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, indices_d)) = 
common::copy_usize_to_device(basis_indices.as_slice()) else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *indices_d.device_ptr() as *const usize as *const c_void;
@@ -545,17 +387,14 @@ fn test_encode_batch_from_gpu_ptr_basis_success() {
     assert!(!dlpack_ptr.is_null());
 
     unsafe {
-        let managed = &mut *dlpack_ptr;
-        let deleter = managed.deleter.take().expect("Deleter missing");
-        deleter(dlpack_ptr);
+        common::take_deleter_and_delete(dlpack_ptr);
     }
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_iqp_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let state_len = 1 << num_qubits;
@@ -563,13 +402,8 @@ fn test_encode_batch_from_gpu_ptr_iqp_success() {
     let num_samples = 3;
     let total = num_samples * sample_size;
     let data: Vec<f64> = (0..total).map(|i| (i as f64) * 0.05).collect();
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -578,14 +412,15 @@ fn test_encode_batch_from_gpu_ptr_iqp_success() {
             .expect("encode_batch_from_gpu_ptr iqp should succeed")
     };
     assert!(!dlpack_ptr.is_null());
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, num_samples as i64, 
state_len as i64);
+    unsafe {
+        common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, num_samples as 
i64, state_len as i64)
+    };
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_iqp_z_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let state_len = 1 << num_qubits;
@@ -593,13 +428,8 @@ fn test_encode_batch_from_gpu_ptr_iqp_z_success() {
     let num_samples = 3;
     let total = num_samples * sample_size;
     let data: Vec<f64> = (0..total).map(|i| (i as f64) * 0.05).collect();
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -608,27 +438,23 @@ fn test_encode_batch_from_gpu_ptr_iqp_z_success() {
             .expect("encode_batch_from_gpu_ptr iqp-z should succeed")
     };
     assert!(!dlpack_ptr.is_null());
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, num_samples as i64, 
state_len as i64);
+    unsafe {
+        common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, num_samples as 
i64, state_len as i64)
+    };
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_iqp_wrong_sample_size() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let expected_sample_size = iqp_full_data_len(num_qubits);
     let wrong_sample_size = expected_sample_size + 1;
     let num_samples = 2;
     let data = vec![0.1_f64; num_samples * wrong_sample_size];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let result = unsafe {
@@ -649,22 +475,16 @@ fn test_encode_batch_from_gpu_ptr_iqp_wrong_sample_size() 
{
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_iqp_z_wrong_sample_size() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let expected_sample_size = iqp_z_data_len(num_qubits);
     let wrong_sample_size = expected_sample_size + 1;
     let num_samples = 2;
     let data = vec![0.1_f64; num_samples * wrong_sample_size];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let result = unsafe {
@@ -685,31 +505,17 @@ fn 
test_encode_batch_from_gpu_ptr_iqp_z_wrong_sample_size() {
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_z_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 2;
     let data = [0.1_f64, -0.2_f64];
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
@@ -719,36 +525,22 @@ fn test_encode_from_gpu_ptr_iqp_z_success() {
             .expect("encode_from_gpu_ptr iqp-z should succeed")
     };
 
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => {
-            println!("SKIP: No GPU available");
-            return;
-        }
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU available");
+        return;
     };
 
     let num_qubits = 2;
     let data = [0.1_f64, -0.2_f64, 0.3_f64];
 
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
-
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => {
-            println!("SKIP: Failed to copy to device");
-            return;
-        }
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        println!("SKIP: Failed to copy to device");
+        return;
     };
 
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
@@ -758,25 +550,19 @@ fn test_encode_from_gpu_ptr_iqp_success() {
             .expect("encode_from_gpu_ptr iqp should succeed")
     };
 
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_wrong_input_len() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let expected_len = iqp_full_data_len(num_qubits);
     let data = vec![0.1_f64; expected_len];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
 
@@ -797,20 +583,14 @@ fn test_encode_from_gpu_ptr_iqp_wrong_input_len() {
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_z_wrong_input_len() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let expected_len = iqp_z_data_len(num_qubits);
     let data = vec![0.1_f64; expected_len];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
 
@@ -826,19 +606,13 @@ fn test_encode_from_gpu_ptr_iqp_z_wrong_input_len() {
 
 #[test]
 fn test_encode_from_gpu_ptr_with_stream_iqp_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let data = [0.1_f64, -0.2_f64, 0.3_f64];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -852,24 +626,18 @@ fn test_encode_from_gpu_ptr_with_stream_iqp_success() {
             )
             .expect("encode_from_gpu_ptr_with_stream iqp")
     };
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_with_stream_iqp_z_success() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 2;
     let data = [0.1_f64, -0.2_f64];
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -883,26 +651,20 @@ fn test_encode_from_gpu_ptr_with_stream_iqp_z_success() {
             )
             .expect("encode_from_gpu_ptr_with_stream iqp-z")
     };
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_three_qubits() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 3;
     let state_len = 1 << num_qubits;
     let expected_len = iqp_full_data_len(num_qubits);
     let data: Vec<f64> = (0..expected_len).map(|i| (i as f64) * 0.1).collect();
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -925,21 +687,15 @@ fn test_encode_from_gpu_ptr_iqp_three_qubits() {
 
 #[test]
 fn test_encode_from_gpu_ptr_iqp_z_three_qubits() {
-    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        return;
     };
     let num_qubits = 3;
     let state_len = 1 << num_qubits;
     let expected_len = iqp_z_data_len(num_qubits);
     let data: Vec<f64> = (0..expected_len).map(|i| (i as f64) * 0.1).collect();
-    let device = match CudaDevice::new(0) {
-        Ok(d) => d,
-        Err(_) => return,
-    };
-    let data_d = match device.htod_sync_copy(data.as_slice()) {
-        Ok(b) => b,
-        Err(_) => return,
+    let Some((_device, data_d)) = common::copy_f64_to_device(data.as_slice()) 
else {
+        return;
     };
     let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
     let dlpack_ptr = unsafe {
@@ -971,7 +727,7 @@ fn test_encode_from_gpu_ptr_f32_success() {
             return;
         }
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0]) 
{
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 
0.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -984,7 +740,7 @@ fn test_encode_from_gpu_ptr_f32_success() {
             .encode_from_gpu_ptr_f32(ptr, input_d.len(), 2)
             .expect("encode_from_gpu_ptr_f32")
     };
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
@@ -996,7 +752,7 @@ fn test_encode_from_gpu_ptr_f32_with_stream_success() {
             return;
         }
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0]) 
{
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 
0.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1008,7 +764,7 @@ fn test_encode_from_gpu_ptr_f32_with_stream_success() {
         engine.encode_from_gpu_ptr_f32_with_stream(ptr, input_d.len(), 2, 
std::ptr::null_mut())
     }
     .expect("encode_from_gpu_ptr_f32_with_stream");
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
@@ -1020,7 +776,7 @@ fn 
test_encode_from_gpu_ptr_f32_with_stream_non_default_success() {
             return;
         }
     };
-    let (device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0]) {
+    let (device, input_d) = match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 
0.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1038,19 +794,16 @@ fn 
test_encode_from_gpu_ptr_f32_with_stream_non_default_success() {
             )
             .expect("encode_from_gpu_ptr_f32_with_stream (non-default stream)")
     };
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
 fn test_encode_from_gpu_ptr_f32_success_f64_engine() {
-    let engine = match QdpEngine::new_with_precision(0, 
Precision::Float64).ok() {
-        Some(e) => e,
-        None => {
-            println!("SKIP: No GPU");
-            return;
-        }
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU");
+        return;
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0]) 
{
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 
0.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1063,7 +816,7 @@ fn test_encode_from_gpu_ptr_f32_success_f64_engine() {
             .encode_from_gpu_ptr_f32(ptr, input_d.len(), 2)
             .expect("encode_from_gpu_ptr_f32 (Float64 engine)")
     };
-    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
 }
 
 #[test]
@@ -1075,7 +828,7 @@ fn test_encode_from_gpu_ptr_f32_empty_input() {
             return;
         }
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0]) {
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1117,7 +870,7 @@ fn test_encode_from_gpu_ptr_f32_input_exceeds_state_len() {
             return;
         }
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0, 
0.0]) {
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 
0.0, 0.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1150,13 +903,14 @@ fn test_encode_batch_from_gpu_ptr_f32_success() {
     };
     let num_samples = 2;
     let sample_size = 4;
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0, 
0.5, 0.5, 0.5, 0.5]) {
-        Some(t) => t,
-        None => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
+    let (_device, input_d) =
+        match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 
0.5]) {
+            Some(t) => t,
+            None => {
+                println!("SKIP: No CUDA device");
+                return;
+            }
+        };
     let dlpack_ptr = unsafe {
         engine
             .encode_batch_from_gpu_ptr_f32(
@@ -1167,7 +921,13 @@ fn test_encode_batch_from_gpu_ptr_f32_success() {
             )
             .expect("encode_batch_from_gpu_ptr_f32")
     };
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, num_samples as i64, 
sample_size as i64);
+    unsafe {
+        common::assert_dlpack_shape_2d_and_delete(
+            dlpack_ptr,
+            num_samples as i64,
+            sample_size as i64,
+        )
+    };
 }
 
 #[test]
@@ -1179,13 +939,14 @@ fn 
test_encode_batch_from_gpu_ptr_f32_with_stream_success() {
             return;
         }
     };
-    let (device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0, 
0.5, 0.5, 0.5, 0.5]) {
-        Some(t) => t,
-        None => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
+    let (device, input_d) =
+        match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 
0.5]) {
+            Some(t) => t,
+            None => {
+                println!("SKIP: No CUDA device");
+                return;
+            }
+        };
     let stream = device.fork_default_stream().expect("fork_default_stream");
     let dlpack_ptr = unsafe {
         engine
@@ -1198,31 +959,29 @@ fn 
test_encode_batch_from_gpu_ptr_f32_with_stream_success() {
             )
             .expect("encode_batch_from_gpu_ptr_f32_with_stream")
     };
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, 2, 4);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 2, 4) };
 }
 
 #[test]
 fn test_encode_batch_from_gpu_ptr_f32_success_f64_engine() {
-    let engine = match QdpEngine::new_with_precision(0, 
Precision::Float64).ok() {
-        Some(e) => e,
-        None => {
-            println!("SKIP: No GPU");
-            return;
-        }
-    };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 0.0, 0.0, 0.0, 
0.5, 0.5, 0.5, 0.5]) {
-        Some(t) => t,
-        None => {
-            println!("SKIP: No CUDA device");
-            return;
-        }
-    };
+    let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) 
else {
+        println!("SKIP: No GPU");
+        return;
+    };
+    let (_device, input_d) =
+        match common::copy_f32_to_device(&[1.0, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 
0.5]) {
+            Some(t) => t,
+            None => {
+                println!("SKIP: No CUDA device");
+                return;
+            }
+        };
     let dlpack_ptr = unsafe {
         engine
             .encode_batch_from_gpu_ptr_f32(*input_d.device_ptr() as *const 
f32, 2, 4, 2)
             .expect("encode_batch_from_gpu_ptr_f32 (Float64 engine)")
     };
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, 2, 4);
+    unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 2, 4) };
 }
 
 #[test]
@@ -1268,7 +1027,7 @@ fn 
test_encode_batch_from_gpu_ptr_f32_sample_size_exceeds_state_len() {
             return;
         }
     };
-    let (_device, input_d) = match device_and_f32_slice(&[1.0; 10]) {
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0; 10]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1299,7 +1058,7 @@ fn 
test_encode_batch_from_gpu_ptr_f32_odd_sample_size_success() {
     let num_samples = 2;
     let sample_size = 3;
     let num_qubits = 2;
-    let (_device, input_d) = match device_and_f32_slice(&[1.0, 2.0, 2.0, 2.0, 
1.0, 2.0]) {
+    let (_device, input_d) = match common::copy_f32_to_device(&[1.0, 2.0, 2.0, 
2.0, 1.0, 2.0]) {
         Some(t) => t,
         None => {
             println!("SKIP: No CUDA device");
@@ -1316,5 +1075,11 @@ fn 
test_encode_batch_from_gpu_ptr_f32_odd_sample_size_success() {
             )
             .expect("encode_batch_from_gpu_ptr_f32 odd sample size")
     };
-    assert_dlpack_batch_shape_and_delete(dlpack_ptr, num_samples as i64, (1 << 
num_qubits) as i64);
+    unsafe {
+        common::assert_dlpack_shape_2d_and_delete(
+            dlpack_ptr,
+            num_samples as i64,
+            (1 << num_qubits) as i64,
+        )
+    };
 }
diff --git a/qdp/qdp-core/tests/validation.rs 
b/qdp/qdp-core/tests/gpu_validation.rs
similarity index 90%
rename from qdp/qdp-core/tests/validation.rs
rename to qdp/qdp-core/tests/gpu_validation.rs
index 7ac25eaf2..d8779f3fc 100644
--- a/qdp/qdp-core/tests/validation.rs
+++ b/qdp/qdp-core/tests/gpu_validation.rs
@@ -16,7 +16,7 @@
 
 // Input validation and error handling tests
 
-use qdp_core::{MahoutError, QdpEngine};
+use qdp_core::MahoutError;
 
 mod common;
 
@@ -25,9 +25,8 @@ mod common;
 fn test_input_validation_invalid_strategy() {
     println!("Testing invalid strategy name rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = common::create_test_data(100);
@@ -52,9 +51,8 @@ fn test_input_validation_invalid_strategy() {
 fn test_input_validation_qubit_mismatch() {
     println!("Testing qubit size validation...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = common::create_test_data(100);
@@ -83,9 +81,8 @@ fn test_input_validation_qubit_mismatch() {
 fn test_input_validation_zero_qubits() {
     println!("Testing zero qubits rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = common::create_test_data(10);
@@ -110,9 +107,8 @@ fn test_input_validation_zero_qubits() {
 fn test_input_validation_max_qubits() {
     println!("Testing maximum qubit limit (30)...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = common::create_test_data(100);
@@ -137,9 +133,8 @@ fn test_input_validation_max_qubits() {
 fn test_input_validation_batch_zero_samples() {
     println!("Testing zero num_samples rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let batch_data = vec![1.0, 2.0, 3.0, 4.0];
@@ -163,9 +158,8 @@ fn test_input_validation_batch_zero_samples() {
 fn test_empty_data() {
     println!("Testing empty data rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data: Vec<f64> = vec![];
@@ -187,9 +181,8 @@ fn test_empty_data() {
 fn test_zero_norm_data() {
     println!("Testing zero-norm data rejection...");
 
-    let engine = match QdpEngine::new(0) {
-        Ok(e) => e,
-        Err(_) => return,
+    let Some(engine) = common::qdp_engine() else {
+        return;
     };
 
     let data = vec![0.0; 128];

(mahout) branch main updated: refactor: standardize qdp-core GPU integration tests (#1172)

Reply via email to