(mahout) branch main updated: [QDP] Add angle encoding (#852)

guanmingchiu Tue, 20 Jan 2026 07:33:20 -0800

This is an automated email from the ASF dual-hosted git repository.

guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/main by this push:
     new 723f04535 [QDP] Add angle encoding (#852)
723f04535 is described below

commit 723f04535c896b7bb7613405d2f5c916a27e6e81
Author: Jie-Kai Chang <[email protected]>
AuthorDate: Tue Jan 20 23:31:22 2026 +0800

    [QDP] Add angle encoding (#852)
    
    * Add Angle encoding
    
    Signed-off-by: 400Ping <[email protected]>
    
    * update
    
    Signed-off-by: 400Ping <[email protected]>
    
    * fix pre-commit error
    
    Signed-off-by: 400Ping <[email protected]>
    
    * fix
    
    Signed-off-by: 400Ping <[email protected]>
    
    ---------
    
    Signed-off-by: 400Ping <[email protected]>
---
 qdp/qdp-core/src/gpu/encodings/angle.rs | 212 ++++++++++++++++++++++++++++++--
 qdp/qdp-kernels/build.rs                |   2 +
 qdp/qdp-kernels/src/angle.cu            | 154 +++++++++++++++++++++++
 qdp/qdp-kernels/src/lib.rs              |  52 +++++++-
 testing/qdp/test_bindings.py            |  96 +++++++++++++++
 5 files changed, 506 insertions(+), 10 deletions(-)

diff --git a/qdp/qdp-core/src/gpu/encodings/angle.rs 
b/qdp/qdp-core/src/gpu/encodings/angle.rs
index d35dfec54..aa4a347a0 100644
--- a/qdp/qdp-core/src/gpu/encodings/angle.rs
+++ b/qdp/qdp-core/src/gpu/encodings/angle.rs
@@ -14,30 +14,224 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Angle encoding (placeholder)
-// TODO: Rotation-based encoding via tensor product
+// Angle encoding: map per-qubit angles to product state amplitudes.
 
 use super::QuantumEncoder;
+#[cfg(target_os = "linux")]
+use crate::error::cuda_error_to_string;
 use crate::error::{MahoutError, Result};
 use crate::gpu::memory::GpuStateVector;
 use cudarc::driver::CudaDevice;
 use std::sync::Arc;
 
-/// Angle encoding (not implemented)
-/// TODO: Use sin/cos for rotation-based states
+#[cfg(target_os = "linux")]
+use crate::gpu::memory::map_allocation_error;
+#[cfg(target_os = "linux")]
+use cudarc::driver::DevicePtr;
+#[cfg(target_os = "linux")]
+use std::ffi::c_void;
+
+/// Angle encoding: each qubit uses one rotation angle to form a product state.
 pub struct AngleEncoder;
 
 impl QuantumEncoder for AngleEncoder {
     fn encode(
         &self,
-        _device: &Arc<CudaDevice>,
+        #[cfg(target_os = "linux")] device: &Arc<CudaDevice>,
+        #[cfg(not(target_os = "linux"))] _device: &Arc<CudaDevice>,
         data: &[f64],
         num_qubits: usize,
     ) -> Result<GpuStateVector> {
         self.validate_input(data, num_qubits)?;
-        Err(MahoutError::InvalidInput(
-            "Angle encoding not yet implemented. Use 'amplitude' encoding for 
now.".to_string(),
-        ))
+        let state_len = 1 << num_qubits;
+
+        #[cfg(target_os = "linux")]
+        {
+            let input_bytes = std::mem::size_of_val(data);
+            let angles_gpu = {
+                crate::profile_scope!("GPU::H2D_Angles");
+                device.htod_sync_copy(data).map_err(|e| {
+                    map_allocation_error(input_bytes, "angle input upload", 
Some(num_qubits), e)
+                })?
+            };
+
+            let state_vector = {
+                crate::profile_scope!("GPU::Alloc");
+                GpuStateVector::new(device, num_qubits)?
+            };
+
+            let state_ptr = state_vector.ptr_f64().ok_or_else(|| {
+                MahoutError::InvalidInput(
+                    "State vector precision mismatch (expected float64 
buffer)".to_string(),
+                )
+            })?;
+
+            let ret = {
+                crate::profile_scope!("GPU::KernelLaunch");
+                unsafe {
+                    qdp_kernels::launch_angle_encode(
+                        *angles_gpu.device_ptr() as *const f64,
+                        state_ptr as *mut c_void,
+                        state_len,
+                        num_qubits as u32,
+                        std::ptr::null_mut(),
+                    )
+                }
+            };
+
+            if ret != 0 {
+                return Err(MahoutError::KernelLaunch(format!(
+                    "Angle encoding kernel failed with CUDA error code: {} 
({})",
+                    ret,
+                    cuda_error_to_string(ret)
+                )));
+            }
+
+            {
+                crate::profile_scope!("GPU::Synchronize");
+                device.synchronize().map_err(|e| {
+                    MahoutError::Cuda(format!("CUDA device synchronize failed: 
{:?}", e))
+                })?;
+            }
+
+            Ok(state_vector)
+        }
+
+        #[cfg(not(target_os = "linux"))]
+        {
+            Err(MahoutError::Cuda(
+                "CUDA unavailable (non-Linux stub)".to_string(),
+            ))
+        }
+    }
+
+    /// Encode multiple angle samples in a single GPU allocation and kernel 
launch
+    #[cfg(target_os = "linux")]
+    fn encode_batch(
+        &self,
+        device: &Arc<CudaDevice>,
+        batch_data: &[f64],
+        num_samples: usize,
+        sample_size: usize,
+        num_qubits: usize,
+    ) -> Result<GpuStateVector> {
+        crate::profile_scope!("AngleEncoder::encode_batch");
+
+        if sample_size != num_qubits {
+            return Err(MahoutError::InvalidInput(format!(
+                "Angle encoding expects sample_size={} (one angle per qubit), 
got {}",
+                num_qubits, sample_size
+            )));
+        }
+
+        if batch_data.len() != num_samples * sample_size {
+            return Err(MahoutError::InvalidInput(format!(
+                "Batch data length {} doesn't match num_samples {} * 
sample_size {}",
+                batch_data.len(),
+                num_samples,
+                sample_size
+            )));
+        }
+
+        if num_qubits == 0 || num_qubits > 30 {
+            return Err(MahoutError::InvalidInput(format!(
+                "Number of qubits {} must be between 1 and 30",
+                num_qubits
+            )));
+        }
+
+        for (i, &val) in batch_data.iter().enumerate() {
+            if !val.is_finite() {
+                let sample_idx = i / sample_size;
+                let angle_idx = i % sample_size;
+                return Err(MahoutError::InvalidInput(format!(
+                    "Sample {} angle {} must be finite, got {}",
+                    sample_idx, angle_idx, val
+                )));
+            }
+        }
+
+        let state_len = 1 << num_qubits;
+
+        let batch_state_vector = {
+            crate::profile_scope!("GPU::AllocBatch");
+            GpuStateVector::new_batch(device, num_samples, num_qubits)?
+        };
+
+        let input_bytes = std::mem::size_of_val(batch_data);
+        let angles_gpu = {
+            crate::profile_scope!("GPU::H2D_BatchAngles");
+            device.htod_sync_copy(batch_data).map_err(|e| {
+                map_allocation_error(input_bytes, "angle batch upload", 
Some(num_qubits), e)
+            })?
+        };
+
+        let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| {
+            MahoutError::InvalidInput(
+                "Batch state vector precision mismatch (expected float64 
buffer)".to_string(),
+            )
+        })?;
+
+        {
+            crate::profile_scope!("GPU::BatchKernelLaunch");
+            let ret = unsafe {
+                qdp_kernels::launch_angle_encode_batch(
+                    *angles_gpu.device_ptr() as *const f64,
+                    state_ptr as *mut c_void,
+                    num_samples,
+                    state_len,
+                    num_qubits as u32,
+                    std::ptr::null_mut(),
+                )
+            };
+
+            if ret != 0 {
+                return Err(MahoutError::KernelLaunch(format!(
+                    "Batch angle encoding kernel failed: {} ({})",
+                    ret,
+                    cuda_error_to_string(ret)
+                )));
+            }
+        }
+
+        {
+            crate::profile_scope!("GPU::Synchronize");
+            device
+                .synchronize()
+                .map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}", 
e)))?;
+        }
+
+        Ok(batch_state_vector)
+    }
+
+    fn validate_input(&self, data: &[f64], num_qubits: usize) -> Result<()> {
+        if num_qubits == 0 {
+            return Err(MahoutError::InvalidInput(
+                "Number of qubits must be at least 1".to_string(),
+            ));
+        }
+        if num_qubits > 30 {
+            return Err(MahoutError::InvalidInput(format!(
+                "Number of qubits {} exceeds practical limit of 30",
+                num_qubits
+            )));
+        }
+        if data.len() != num_qubits {
+            return Err(MahoutError::InvalidInput(format!(
+                "Angle encoding expects {} values (one per qubit), got {}",
+                num_qubits,
+                data.len()
+            )));
+        }
+        for (i, &val) in data.iter().enumerate() {
+            if !val.is_finite() {
+                return Err(MahoutError::InvalidInput(format!(
+                    "Angle at index {} must be finite, got {}",
+                    i, val
+                )));
+            }
+        }
+        Ok(())
     }
 
     fn name(&self) -> &'static str {
@@ -45,6 +239,6 @@ impl QuantumEncoder for AngleEncoder {
     }
 
     fn description(&self) -> &'static str {
-        "Angle encoding (not implemented)"
+        "Angle encoding: per-qubit rotations into a product state"
     }
 }
diff --git a/qdp/qdp-kernels/build.rs b/qdp/qdp-kernels/build.rs
index c845580a9..0c21ec6fd 100644
--- a/qdp/qdp-kernels/build.rs
+++ b/qdp/qdp-kernels/build.rs
@@ -30,6 +30,7 @@ fn main() {
     // Tell Cargo to rerun this script if the kernel sources change
     println!("cargo:rerun-if-changed=src/amplitude.cu");
     println!("cargo:rerun-if-changed=src/basis.cu");
+    println!("cargo:rerun-if-changed=src/angle.cu");
 
     // Check if CUDA is available by looking for nvcc
     let has_cuda = Command::new("nvcc").arg("--version").output().is_ok();
@@ -83,5 +84,6 @@ fn main() {
         // .flag("arch=compute_89,code=sm_89")
         .file("src/amplitude.cu")
         .file("src/basis.cu")
+        .file("src/angle.cu")
         .compile("kernels");
 }
diff --git a/qdp/qdp-kernels/src/angle.cu b/qdp/qdp-kernels/src/angle.cu
new file mode 100644
index 000000000..bf7de6e61
--- /dev/null
+++ b/qdp/qdp-kernels/src/angle.cu
@@ -0,0 +1,154 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Angle Encoding CUDA Kernels
+//
+// For each qubit angle x_k, build a product state:
+// |psi(x)> = ⊗_k (cos(x_k)|0> + sin(x_k)|1>)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+__global__ void angle_encode_kernel(
+    const double* __restrict__ angles,
+    cuDoubleComplex* __restrict__ state,
+    size_t state_len,
+    unsigned int num_qubits
+) {
+    size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+    if (idx >= state_len) return;
+
+    double amplitude = 1.0;
+    for (unsigned int bit = 0; bit < num_qubits; ++bit) {
+        double angle = angles[bit];
+        amplitude *= ((idx >> bit) & 1U) ? sin(angle) : cos(angle);
+    }
+
+    state[idx] = make_cuDoubleComplex(amplitude, 0.0);
+}
+
+__global__ void angle_encode_batch_kernel(
+    const double* __restrict__ angles_batch,
+    cuDoubleComplex* __restrict__ state_batch,
+    size_t num_samples,
+    size_t state_len,
+    unsigned int num_qubits
+) {
+    const size_t total_elements = num_samples * state_len;
+    const size_t stride = gridDim.x * blockDim.x;
+    const size_t state_mask = state_len - 1;
+
+    for (size_t global_idx = blockIdx.x * blockDim.x + threadIdx.x;
+         global_idx < total_elements;
+         global_idx += stride) {
+        const size_t sample_idx = global_idx >> num_qubits;
+        const size_t element_idx = global_idx & state_mask;
+        const double* angles = angles_batch + sample_idx * num_qubits;
+
+        double amplitude = 1.0;
+        for (unsigned int bit = 0; bit < num_qubits; ++bit) {
+            double angle = angles[bit];
+            amplitude *= ((element_idx >> bit) & 1U) ? sin(angle) : cos(angle);
+        }
+
+        state_batch[global_idx] = make_cuDoubleComplex(amplitude, 0.0);
+    }
+}
+
+extern "C" {
+
+/// Launch angle encoding kernel
+///
+/// # Arguments
+/// * angles_d - Device pointer to per-qubit angles
+/// * state_d - Device pointer to output state vector
+/// * state_len - Target state vector size (2^num_qubits)
+/// * num_qubits - Number of qubits (angles length)
+/// * stream - CUDA stream for async execution (nullptr = default stream)
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_angle_encode(
+    const double* angles_d,
+    void* state_d,
+    size_t state_len,
+    unsigned int num_qubits,
+    cudaStream_t stream
+) {
+    if (state_len == 0 || num_qubits == 0) {
+        return cudaErrorInvalidValue;
+    }
+
+    cuDoubleComplex* state_complex_d = static_cast<cuDoubleComplex*>(state_d);
+
+    const int blockSize = 256;
+    const int gridSize = (state_len + blockSize - 1) / blockSize;
+
+    angle_encode_kernel<<<gridSize, blockSize, 0, stream>>>(
+        angles_d,
+        state_complex_d,
+        state_len,
+        num_qubits
+    );
+
+    return (int)cudaGetLastError();
+}
+
+/// Launch batch angle encoding kernel
+///
+/// # Arguments
+/// * angles_batch_d - Device pointer to batch angles (num_samples * 
num_qubits)
+/// * state_batch_d - Device pointer to output batch state vectors
+/// * num_samples - Number of samples in batch
+/// * state_len - State vector size per sample (2^num_qubits)
+/// * num_qubits - Number of qubits (angles length)
+/// * stream - CUDA stream for async execution
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_angle_encode_batch(
+    const double* angles_batch_d,
+    void* state_batch_d,
+    size_t num_samples,
+    size_t state_len,
+    unsigned int num_qubits,
+    cudaStream_t stream
+) {
+    if (num_samples == 0 || state_len == 0 || num_qubits == 0) {
+        return cudaErrorInvalidValue;
+    }
+
+    cuDoubleComplex* state_complex_d = 
static_cast<cuDoubleComplex*>(state_batch_d);
+
+    const int blockSize = 256;
+    const size_t total_elements = num_samples * state_len;
+    const size_t blocks_needed = (total_elements + blockSize - 1) / blockSize;
+    const size_t max_blocks = 2048;
+    const size_t gridSize = (blocks_needed < max_blocks) ? blocks_needed : 
max_blocks;
+
+    angle_encode_batch_kernel<<<gridSize, blockSize, 0, stream>>>(
+        angles_batch_d,
+        state_complex_d,
+        num_samples,
+        state_len,
+        num_qubits
+    );
+
+    return (int)cudaGetLastError();
+}
+
+} // extern "C"
diff --git a/qdp/qdp-kernels/src/lib.rs b/qdp/qdp-kernels/src/lib.rs
index 536dc1df1..f1ebd351c 100644
--- a/qdp/qdp-kernels/src/lib.rs
+++ b/qdp/qdp-kernels/src/lib.rs
@@ -162,7 +162,32 @@ unsafe extern "C" {
         stream: *mut c_void,
     ) -> i32;
 
-    // TODO: launch_angle_encode
+    /// Launch angle encoding kernel
+    /// Returns CUDA error code (0 = success)
+    ///
+    /// # Safety
+    /// Requires valid GPU pointers, must sync before freeing
+    pub fn launch_angle_encode(
+        angles_d: *const f64,
+        state_d: *mut c_void,
+        state_len: usize,
+        num_qubits: u32,
+        stream: *mut c_void,
+    ) -> i32;
+
+    /// Launch batch angle encoding kernel
+    /// Returns CUDA error code (0 = success)
+    ///
+    /// # Safety
+    /// Requires valid GPU pointers, must sync before freeing
+    pub fn launch_angle_encode_batch(
+        angles_batch_d: *const f64,
+        state_batch_d: *mut c_void,
+        num_samples: usize,
+        state_len: usize,
+        num_qubits: u32,
+        stream: *mut c_void,
+    ) -> i32;
 }
 
 // Dummy implementation for non-Linux (allows compilation)
@@ -249,3 +274,28 @@ pub extern "C" fn launch_basis_encode_batch(
 ) -> i32 {
     999
 }
+
+#[cfg(not(target_os = "linux"))]
+#[unsafe(no_mangle)]
+pub extern "C" fn launch_angle_encode(
+    _angles_d: *const f64,
+    _state_d: *mut c_void,
+    _state_len: usize,
+    _num_qubits: u32,
+    _stream: *mut c_void,
+) -> i32 {
+    999
+}
+
+#[cfg(not(target_os = "linux"))]
+#[unsafe(no_mangle)]
+pub extern "C" fn launch_angle_encode_batch(
+    _angles_batch_d: *const f64,
+    _state_batch_d: *mut c_void,
+    _num_samples: usize,
+    _state_len: usize,
+    _num_qubits: u32,
+    _stream: *mut c_void,
+) -> i32 {
+    999
+}
diff --git a/testing/qdp/test_bindings.py b/testing/qdp/test_bindings.py
index 9c5acfd56..64bf09727 100644
--- a/testing/qdp/test_bindings.py
+++ b/testing/qdp/test_bindings.py
@@ -364,3 +364,99 @@ def test_basis_encode_errors():
     # Test multiple values (basis expects exactly 1)
     with pytest.raises(RuntimeError, match="expects exactly 1"):
         engine.encode([0.0, 1.0], 2, "basis")
+
+
[email protected]
+def test_angle_encode_basic():
+    """Test basic angle encoding (requires GPU)."""
+    pytest.importorskip("torch")
+    import torch
+    from _qdp import QdpEngine
+
+    if not torch.cuda.is_available():
+        pytest.skip("GPU required for QdpEngine")
+
+    engine = QdpEngine(0)
+
+    # Angles [0, 0] should map to |00> with amplitude 1 at index 0.
+    qtensor = engine.encode([0.0, 0.0], 2, "angle")
+    torch_tensor = torch.from_dlpack(qtensor)
+
+    assert torch_tensor.is_cuda
+    assert torch_tensor.shape == (1, 4)
+
+    expected = torch.tensor([[1.0 + 0j, 0.0 + 0j, 0.0 + 0j, 0.0 + 0j]], 
device="cuda:0")
+    assert torch.allclose(torch_tensor, expected.to(torch_tensor.dtype))
+
+
[email protected]
+def test_angle_encode_nonzero_angles():
+    """Test angle encoding with non-zero angles (requires GPU)."""
+    pytest.importorskip("torch")
+    import torch
+    from _qdp import QdpEngine
+
+    if not torch.cuda.is_available():
+        pytest.skip("GPU required for QdpEngine")
+
+    engine = QdpEngine(0)
+
+    angles = [torch.pi / 2, 0.0]
+    qtensor = engine.encode(angles, 2, "angle")
+    torch_tensor = torch.from_dlpack(qtensor)
+
+    expected = torch.tensor([[0.0 + 0j, 1.0 + 0j, 0.0 + 0j, 0.0 + 0j]], 
device="cuda:0")
+    assert torch.allclose(
+        torch_tensor, expected.to(torch_tensor.dtype), atol=1e-6, rtol=1e-6
+    )
+
+
[email protected]
+def test_angle_encode_batch():
+    """Test batch angle encoding (requires GPU)."""
+    pytest.importorskip("torch")
+    import torch
+    from _qdp import QdpEngine
+
+    if not torch.cuda.is_available():
+        pytest.skip("GPU required for QdpEngine")
+
+    engine = QdpEngine(0)
+
+    data = torch.tensor([[0.0, 0.0], [torch.pi / 2, 0.0]], dtype=torch.float64)
+    qtensor = engine.encode(data, 2, "angle")
+    torch_tensor = torch.from_dlpack(qtensor)
+
+    assert torch_tensor.shape == (2, 4)
+
+    expected = torch.tensor(
+        [
+            [1.0 + 0j, 0.0 + 0j, 0.0 + 0j, 0.0 + 0j],
+            [0.0 + 0j, 1.0 + 0j, 0.0 + 0j, 0.0 + 0j],
+        ],
+        device="cuda:0",
+    )
+    assert torch.allclose(
+        torch_tensor, expected.to(torch_tensor.dtype), atol=1e-6, rtol=1e-6
+    )
+
+
[email protected]
+def test_angle_encode_errors():
+    """Test error handling for angle encoding (requires GPU)."""
+    pytest.importorskip("torch")
+    import torch
+    from _qdp import QdpEngine
+
+    if not torch.cuda.is_available():
+        pytest.skip("GPU required for QdpEngine")
+
+    engine = QdpEngine(0)
+
+    # Wrong length (expects one angle per qubit)
+    with pytest.raises(RuntimeError, match="expects 2 values"):
+        engine.encode([0.0], 2, "angle")
+
+    # Non-finite angle
+    with pytest.raises(RuntimeError, match="must be finite"):
+        engine.encode([float("nan"), 0.0], 2, "angle")

(mahout) branch main updated: [QDP] Add angle encoding (#852)

Reply via email to