This is an automated email from the ASF dual-hosted git repository.

guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git

commit e3bc0c12e4ee1b3a6ebca671370fa960bfd376bf
Author: KUAN-HAO HUANG <[email protected]>
AuthorDate: Mon Dec 1 01:19:47 2025 +0800

    [QDP] improve memory initialization (#668)
---
 qdp/qdp-core/src/gpu/memory.rs | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/qdp/qdp-core/src/gpu/memory.rs b/qdp/qdp-core/src/gpu/memory.rs
index 38bb90e27..49f26602a 100644
--- a/qdp/qdp-core/src/gpu/memory.rs
+++ b/qdp/qdp-core/src/gpu/memory.rs
@@ -57,21 +57,17 @@ impl GpuStateVector {
     pub fn new(_device: &Arc<CudaDevice>, qubits: usize) -> Result<Self> {
         let _size_elements = 1 << qubits;
 
-        // Use device-side allocation (critical for performance):
-        // - No CPU RAM usage (avoids OOM for large states)
-        // - Fast: microseconds vs seconds for 30 qubits (16GB)
-        // TODO: Use uninitialized alloc() when kernel fully implements padding
         #[cfg(target_os = "linux")]
         {
-            // Allocate GPU memory (zero-initialized)
-            let zeros = vec![CuDoubleComplex { x: 0.0, y: 0.0 }; 
_size_elements];
-            let slice = _device.htod_sync_copy(&zeros)
-                .map_err(|e| MahoutError::MemoryAllocation(
-                    format!("Failed to allocate {} bytes of GPU memory 
(qubits={}): {:?}",
-                            _size_elements * 
std::mem::size_of::<CuDoubleComplex>(),
-                            qubits,
-                            e)
-                ))?;
+            // Use uninitialized allocation to avoid memory bandwidth waste.
+            let slice = unsafe {
+                _device.alloc::<CuDoubleComplex>(_size_elements)
+            }.map_err(|e| MahoutError::MemoryAllocation(
+                format!("Failed to allocate {} bytes of GPU memory 
(qubits={}): {:?}",
+                        _size_elements * 
std::mem::size_of::<CuDoubleComplex>(),
+                        qubits,
+                        e)
+            ))?;
 
             Ok(Self {
                 buffer: Arc::new(GpuBufferRaw { slice }),

Reply via email to