Re: [PR] feat: support CUDA tensor GPU-pointer encoding for iqp and iqp-z [mahout]

via GitHub Fri, 06 Mar 2026 19:40:47 -0800


Copilot commented on code in PR #1115:
URL: https://github.com/apache/mahout/pull/1115#discussion_r2898867545



##########
qdp/qdp-core/tests/gpu_ptr_encoding.rs:
##########
@@ -541,6 +541,84 @@ fn test_encode_batch_from_gpu_ptr_basis_success() {
     }
 }
 
+#[test]
+fn test_encode_from_gpu_ptr_iqp_z_success() {
+    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
+        Ok(e) => e,
+        Err(_) => {
+            println!("SKIP: No GPU available");
+            return;
+        }
+    };
+
+    let num_qubits = 2;
+    let data = [0.1_f64, -0.2_f64];
+
+    let device = match CudaDevice::new(0) {
+        Ok(d) => d,
+        Err(_) => {
+            println!("SKIP: No CUDA device");
+            return;
+        }
+    };
+
+    let data_d = match device.htod_sync_copy(data.as_slice()) {
+        Ok(b) => b,
+        Err(_) => {
+            println!("SKIP: Failed to copy to device");
+            return;
+        }
+    };
+
+    let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
+    let dlpack_ptr = unsafe {
+        engine
+            .encode_from_gpu_ptr(ptr, data.len(), num_qubits, "iqp-z")
+            .expect("encode_from_gpu_ptr iqp-z should succeed")
+    };
+
+    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+}
+
+#[test]
+fn test_encode_from_gpu_ptr_iqp_success() {
+    let engine = match QdpEngine::new_with_precision(0, Precision::Float64) {
+        Ok(e) => e,
+        Err(_) => {
+            println!("SKIP: No GPU available");
+            return;
+        }
+    };
+
+    let num_qubits = 2;
+    let data = [0.1_f64, -0.2_f64, 0.3_f64];
+
+    let device = match CudaDevice::new(0) {
+        Ok(d) => d,
+        Err(_) => {
+            println!("SKIP: No CUDA device");
+            return;
+        }
+    };
+
+    let data_d = match device.htod_sync_copy(data.as_slice()) {
+        Ok(b) => b,
+        Err(_) => {
+            println!("SKIP: Failed to copy to device");
+            return;
+        }
+    };
+
+    let ptr = *data_d.device_ptr() as *const f64 as *const c_void;
+    let dlpack_ptr = unsafe {
+        engine
+            .encode_from_gpu_ptr(ptr, data.len(), num_qubits, "iqp")
+            .expect("encode_from_gpu_ptr iqp should succeed")
+    };
+
+    assert_dlpack_shape_2_4_and_delete(dlpack_ptr);
+}

Review Comment:
   The PR adds `encode_batch_from_gpu_ptr(...)` support for IQP/IQP-Z, but 
`gpu_ptr_encoding.rs` only adds happy-path coverage for the single-sample 
`encode_from_gpu_ptr` entry point. Please add at least one regression test that 
exercises `encode_batch_from_gpu_ptr` for `iqp` and/or `iqp-z` (including a 
basic shape assertion) so batch GPU-pointer encoding is covered.



##########
qdp/qdp-core/src/gpu/encodings/iqp.rs:
##########
@@ -241,6 +241,147 @@ impl QuantumEncoder for IqpEncoder {
         Ok(batch_state_vector)
     }
 
+    #[cfg(target_os = "linux")]
+    unsafe fn encode_from_gpu_ptr(
+        &self,
+        device: &Arc<CudaDevice>,
+        input_d: *const c_void,
+        input_len: usize,
+        num_qubits: usize,
+        stream: *mut c_void,
+    ) -> Result<GpuStateVector> {
+        if num_qubits == 0 || num_qubits > 30 {
+            return Err(MahoutError::InvalidInput(format!(
+                "Number of qubits {} must be between 1 and 30",
+                num_qubits
+            )));
+        }
+
+        let expected_len = self.expected_data_len(num_qubits);
+        if input_len != expected_len {
+            return Err(MahoutError::InvalidInput(format!(
+                "IQP{} encoding expects {} values for {} qubits, got {}",
+                if self.enable_zz { "" } else { "-Z" },
+                expected_len,
+                num_qubits,
+                input_len
+            )));
+        }
+
+        let state_len = 1 << num_qubits;
+        let state_vector = {
+            crate::profile_scope!("GPU::Alloc");
+            GpuStateVector::new(device, num_qubits, Precision::Float64)?
+        };
+
+        let state_ptr = state_vector.ptr_f64().ok_or_else(|| {
+            MahoutError::InvalidInput(
+                "State vector precision mismatch (expected float64 
buffer)".to_string(),
+            )
+        })?;
+
+        let ret = {
+            crate::profile_scope!("GPU::KernelLaunch");
+            unsafe {
+                qdp_kernels::launch_iqp_encode(
+                    input_d as *const f64,
+                    state_ptr as *mut c_void,
+                    state_len,
+                    num_qubits as u32,
+                    if self.enable_zz { 1 } else { 0 },
+                    stream,
+                )
+            }
+        };
+
+        if ret != 0 {
+            return Err(MahoutError::KernelLaunch(format!(
+                "IQP encoding kernel failed with CUDA error code: {} ({})",
+                ret,
+                cuda_error_to_string(ret)
+            )));
+        }
+
+        {
+            crate::profile_scope!("GPU::Synchronize");
+            crate::gpu::cuda_sync::sync_cuda_stream(stream, "CUDA stream 
synchronize failed")?;
+        }
+
+        Ok(state_vector)
+    }
+
+    #[cfg(target_os = "linux")]
+    unsafe fn encode_batch_from_gpu_ptr(
+        &self,
+        device: &Arc<CudaDevice>,
+        input_batch_d: *const c_void,
+        num_samples: usize,
+        sample_size: usize,
+        num_qubits: usize,
+        stream: *mut c_void,
+    ) -> Result<GpuStateVector> {
+        let expected_len = self.expected_data_len(num_qubits);
+        if sample_size != expected_len {
+            return Err(MahoutError::InvalidInput(format!(
+                "IQP{} encoding expects sample_size={} for {} qubits, got {}",
+                if self.enable_zz { "" } else { "-Z" },
+                expected_len,
+                num_qubits,
+                sample_size
+            )));
+        }
+
+        if num_qubits == 0 || num_qubits > 30 {
+            return Err(MahoutError::InvalidInput(format!(
+                "Number of qubits {} must be between 1 and 30",

Review Comment:
   In `encode_batch_from_gpu_ptr`, `expected_data_len(num_qubits)` is computed 
before validating `num_qubits`. For `enable_zz=true` and `num_qubits=0`, 
`num_qubits - 1` underflows inside `expected_data_len` and can panic in debug 
builds / wrap in release, producing a bogus `expected_len`. Validate 
`num_qubits` before calling `expected_data_len`, or make `expected_data_len` 
handle `num_qubits < 2` without subtraction underflow (e.g., via 
checked/saturating math).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: support CUDA tensor GPU-pointer encoding for iqp and iqp-z [mahout]

Reply via email to