(mahout) branch dev-qdp updated: [QDP] Fix DLPack device_id hardcoding (#745)

richhuang Mon, 22 Dec 2025 18:51:31 -0800

This is an automated email from the ASF dual-hosted git repository.

richhuang pushed a commit to branch dev-qdp
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/dev-qdp by this push:
     new aa306793f [QDP] Fix DLPack device_id hardcoding (#745)
aa306793f is described below

commit aa306793f041340b5aaf2af7a11d03fd3d6de318
Author: KUAN-HAO HUANG <[email protected]>
AuthorDate: Tue Dec 23 10:51:14 2025 +0800

    [QDP] Fix DLPack device_id hardcoding (#745)
    
    * [QDP] Fix DLPack device_id hardcoding# Please enter the commit message 
for your changes. Lines starting
    
    * update and move comments position
    
    * improve the test
---
 qdp/qdp-core/src/dlpack.rs            |  2 +-
 qdp/qdp-core/src/gpu/memory.rs        |  4 ++++
 qdp/qdp-core/tests/api_workflow.rs    | 43 +++++++++++++++++++++++++++++++++++
 qdp/qdp-core/tests/memory_safety.rs   |  2 +-
 qdp/qdp-python/src/lib.rs             | 18 +++++++++++++--
 qdp/qdp-python/tests/test_bindings.py | 39 +++++++++++++++++++++++++++++++
 6 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/qdp/qdp-core/src/dlpack.rs b/qdp/qdp-core/src/dlpack.rs
index 883d19b37..dd134ca5d 100644
--- a/qdp/qdp-core/src/dlpack.rs
+++ b/qdp/qdp-core/src/dlpack.rs
@@ -140,7 +140,7 @@ impl GpuStateVector {
             data: self.ptr_void(),
             device: DLDevice {
                 device_type: DLDeviceType::kDLCUDA,
-                device_id: 0,
+                device_id: self.device_id as c_int,
             },
             ndim: 1,
             dtype: DLDataType {
diff --git a/qdp/qdp-core/src/gpu/memory.rs b/qdp/qdp-core/src/gpu/memory.rs
index 26e7b1383..1cfd32eca 100644
--- a/qdp/qdp-core/src/gpu/memory.rs
+++ b/qdp/qdp-core/src/gpu/memory.rs
@@ -190,6 +190,7 @@ pub struct GpuStateVector {
     pub(crate) buffer: Arc<BufferStorage>,
     pub num_qubits: usize,
     pub size_elements: usize,
+    pub device_id: usize,
 }
 
 // Safety: CudaSlice and Arc are both Send + Sync
@@ -229,6 +230,7 @@ impl GpuStateVector {
                 buffer: Arc::new(BufferStorage::F64(GpuBufferRaw { slice })),
                 num_qubits: qubits,
                 size_elements: _size_elements,
+                device_id: _device.ordinal(),
             })
         }
 
@@ -300,6 +302,7 @@ impl GpuStateVector {
                 buffer: Arc::new(BufferStorage::F64(GpuBufferRaw { slice })),
                 num_qubits: qubits,
                 size_elements: total_elements,
+                device_id: _device.ordinal(),
             })
         }
 
@@ -364,6 +367,7 @@ impl GpuStateVector {
                         buffer: Arc::new(BufferStorage::F32(GpuBufferRaw { 
slice })),
                         num_qubits: self.num_qubits,
                         size_elements: self.size_elements,
+                        device_id: device.ordinal(),
                     })
                 }
 
diff --git a/qdp/qdp-core/tests/api_workflow.rs 
b/qdp/qdp-core/tests/api_workflow.rs
index a1e97e31a..13c2126ec 100644
--- a/qdp/qdp-core/tests/api_workflow.rs
+++ b/qdp/qdp-core/tests/api_workflow.rs
@@ -107,3 +107,46 @@ fn test_amplitude_encoding_async_pipeline() {
         println!("PASS: Memory freed successfully");
     }
 }
+
+#[test]
+#[cfg(target_os = "linux")]
+fn test_dlpack_device_id() {
+    println!("Testing DLPack device_id propagation...");
+
+    let engine = match QdpEngine::new(0) {
+        Ok(e) => e,
+        Err(_) => {
+            println!("SKIP: No GPU available");
+            return;
+        }
+    };
+
+    let data = common::create_test_data(16);
+    let result = engine.encode(&data, 4, "amplitude");
+    assert!(result.is_ok(), "Encoding should succeed");
+
+    let dlpack_ptr = result.unwrap();
+    assert!(!dlpack_ptr.is_null(), "DLPack pointer should not be null");
+
+    unsafe {
+        let managed = &*dlpack_ptr;
+        let tensor = &managed.dl_tensor;
+
+        // Verify device_id is correctly set (0 for device 0)
+        assert_eq!(tensor.device.device_id, 0, "device_id should be 0 for 
device 0");
+
+        // Verify device_type is CUDA (kDLCUDA = 2)
+        use qdp_core::dlpack::DLDeviceType;
+        match tensor.device.device_type {
+            DLDeviceType::kDLCUDA => println!("PASS: Device type is CUDA"),
+            _ => panic!("Expected CUDA device type"),
+        }
+
+        println!("PASS: DLPack device_id correctly set to {}", 
tensor.device.device_id);
+
+        // Free memory
+        if let Some(deleter) = managed.deleter {
+            deleter(dlpack_ptr);
+        }
+    }
+}
diff --git a/qdp/qdp-core/tests/memory_safety.rs 
b/qdp/qdp-core/tests/memory_safety.rs
index 2b5fdd6e8..6aa2d355a 100644
--- a/qdp/qdp-core/tests/memory_safety.rs
+++ b/qdp/qdp-core/tests/memory_safety.rs
@@ -94,7 +94,7 @@ fn test_multiple_concurrent_states() {
 fn test_dlpack_tensor_metadata_default() {
     println!("Testing DLPack tensor metadata...");
 
-    let engine = match QdpEngine::new(0) {
+    let engine = match QdpEngine::new_with_precision(0, 
qdp_core::Precision::Float64) {
         Ok(e) => e,
         Err(_) => return,
     };
diff --git a/qdp/qdp-python/src/lib.rs b/qdp/qdp-python/src/lib.rs
index 04f3c5367..d94aceeb2 100644
--- a/qdp/qdp-python/src/lib.rs
+++ b/qdp/qdp-python/src/lib.rs
@@ -94,8 +94,22 @@ impl QuantumTensor {
     /// Returns:
     ///     Tuple of (device_type, device_id) where device_type=2 for CUDA
     fn __dlpack_device__(&self) -> PyResult<(i32, i32)> {
-        // DLDeviceType::kDLCUDA = 2, device_id = 0
-        Ok((2, 0))
+        if self.ptr.is_null() {
+            return Err(PyRuntimeError::new_err("Invalid DLPack tensor 
pointer"));
+        }
+
+        unsafe {
+            let tensor = &(*self.ptr).dl_tensor;
+            // device_type is an enum, convert to integer
+            // kDLCUDA = 2, kDLCPU = 1
+            // Ref: 
https://github.com/dmlc/dlpack/blob/6ea9b3eb64c881f614cd4537f95f0e125a35555c/include/dlpack/dlpack.h#L76-L80
+            let device_type = match tensor.device.device_type {
+                qdp_core::dlpack::DLDeviceType::kDLCUDA => 2,
+                qdp_core::dlpack::DLDeviceType::kDLCPU => 1,
+            };
+            // Read device_id from DLPack tensor metadata
+            Ok((device_type, tensor.device.device_id))
+        }
     }
 }
 
diff --git a/qdp/qdp-python/tests/test_bindings.py 
b/qdp/qdp-python/tests/test_bindings.py
index 1fc586f78..d3cda3e22 100644
--- a/qdp/qdp-python/tests/test_bindings.py
+++ b/qdp/qdp-python/tests/test_bindings.py
@@ -20,6 +20,16 @@ import pytest
 import mahout_qdp
 
 
+def _has_multi_gpu():
+    """Check if multiple GPUs are available via PyTorch."""
+    try:
+        import torch
+
+        return torch.cuda.is_available() and torch.cuda.device_count() >= 2
+    except ImportError:
+        return False
+
+
 def test_import():
     """Test that PyO3 bindings are properly imported."""
     assert hasattr(mahout_qdp, "QdpEngine")
@@ -50,6 +60,35 @@ def test_dlpack_device():
     assert device_info == (2, 0), "Expected (2, 0) for CUDA device 0"
 
 
[email protected]
[email protected](
+    not _has_multi_gpu(), reason="Multi-GPU setup required for this test"
+)
+def test_dlpack_device_id_non_zero():
+    """Test device_id propagation for non-zero devices (requires multi-GPU)."""
+    pytest.importorskip("torch")
+    import torch
+    from mahout_qdp import QdpEngine
+
+    # Test with device_id=1 (second GPU)
+    device_id = 1
+    engine = QdpEngine(device_id)
+    data = [1.0, 2.0, 3.0, 4.0]
+    qtensor = engine.encode(data, 2, "amplitude")
+
+    device_info = qtensor.__dlpack_device__()
+    assert device_info == (2, device_id), (
+        f"Expected (2, {device_id}) for CUDA device {device_id}"
+    )
+
+    # Verify PyTorch integration works with non-zero device_id
+    torch_tensor = torch.from_dlpack(qtensor)
+    assert torch_tensor.is_cuda
+    assert torch_tensor.device.index == device_id, (
+        f"PyTorch tensor should be on device {device_id}"
+    )
+
+
 @pytest.mark.gpu
 def test_dlpack_single_use():
     """Test that __dlpack__ can only be called once (requires GPU)."""

(mahout) branch dev-qdp updated: [QDP] Fix DLPack device_id hardcoding (#745)

Reply via email to