This is an automated email from the ASF dual-hosted git repository.
guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 723f04535 [QDP] Add angle encoding (#852)
723f04535 is described below
commit 723f04535c896b7bb7613405d2f5c916a27e6e81
Author: Jie-Kai Chang <[email protected]>
AuthorDate: Tue Jan 20 23:31:22 2026 +0800
[QDP] Add angle encoding (#852)
* Add Angle encoding
Signed-off-by: 400Ping <[email protected]>
* update
Signed-off-by: 400Ping <[email protected]>
* fix pre-commit error
Signed-off-by: 400Ping <[email protected]>
* fix
Signed-off-by: 400Ping <[email protected]>
---------
Signed-off-by: 400Ping <[email protected]>
---
qdp/qdp-core/src/gpu/encodings/angle.rs | 212 ++++++++++++++++++++++++++++++--
qdp/qdp-kernels/build.rs | 2 +
qdp/qdp-kernels/src/angle.cu | 154 +++++++++++++++++++++++
qdp/qdp-kernels/src/lib.rs | 52 +++++++-
testing/qdp/test_bindings.py | 96 +++++++++++++++
5 files changed, 506 insertions(+), 10 deletions(-)
diff --git a/qdp/qdp-core/src/gpu/encodings/angle.rs
b/qdp/qdp-core/src/gpu/encodings/angle.rs
index d35dfec54..aa4a347a0 100644
--- a/qdp/qdp-core/src/gpu/encodings/angle.rs
+++ b/qdp/qdp-core/src/gpu/encodings/angle.rs
@@ -14,30 +14,224 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// Angle encoding (placeholder)
-// TODO: Rotation-based encoding via tensor product
+// Angle encoding: map per-qubit angles to product state amplitudes.
use super::QuantumEncoder;
+#[cfg(target_os = "linux")]
+use crate::error::cuda_error_to_string;
use crate::error::{MahoutError, Result};
use crate::gpu::memory::GpuStateVector;
use cudarc::driver::CudaDevice;
use std::sync::Arc;
-/// Angle encoding (not implemented)
-/// TODO: Use sin/cos for rotation-based states
+#[cfg(target_os = "linux")]
+use crate::gpu::memory::map_allocation_error;
+#[cfg(target_os = "linux")]
+use cudarc::driver::DevicePtr;
+#[cfg(target_os = "linux")]
+use std::ffi::c_void;
+
+/// Angle encoding: each qubit uses one rotation angle to form a product state.
pub struct AngleEncoder;
impl QuantumEncoder for AngleEncoder {
fn encode(
&self,
- _device: &Arc<CudaDevice>,
+ #[cfg(target_os = "linux")] device: &Arc<CudaDevice>,
+ #[cfg(not(target_os = "linux"))] _device: &Arc<CudaDevice>,
data: &[f64],
num_qubits: usize,
) -> Result<GpuStateVector> {
self.validate_input(data, num_qubits)?;
- Err(MahoutError::InvalidInput(
- "Angle encoding not yet implemented. Use 'amplitude' encoding for
now.".to_string(),
- ))
+ let state_len = 1 << num_qubits;
+
+ #[cfg(target_os = "linux")]
+ {
+ let input_bytes = std::mem::size_of_val(data);
+ let angles_gpu = {
+ crate::profile_scope!("GPU::H2D_Angles");
+ device.htod_sync_copy(data).map_err(|e| {
+ map_allocation_error(input_bytes, "angle input upload",
Some(num_qubits), e)
+ })?
+ };
+
+ let state_vector = {
+ crate::profile_scope!("GPU::Alloc");
+ GpuStateVector::new(device, num_qubits)?
+ };
+
+ let state_ptr = state_vector.ptr_f64().ok_or_else(|| {
+ MahoutError::InvalidInput(
+ "State vector precision mismatch (expected float64
buffer)".to_string(),
+ )
+ })?;
+
+ let ret = {
+ crate::profile_scope!("GPU::KernelLaunch");
+ unsafe {
+ qdp_kernels::launch_angle_encode(
+ *angles_gpu.device_ptr() as *const f64,
+ state_ptr as *mut c_void,
+ state_len,
+ num_qubits as u32,
+ std::ptr::null_mut(),
+ )
+ }
+ };
+
+ if ret != 0 {
+ return Err(MahoutError::KernelLaunch(format!(
+ "Angle encoding kernel failed with CUDA error code: {}
({})",
+ ret,
+ cuda_error_to_string(ret)
+ )));
+ }
+
+ {
+ crate::profile_scope!("GPU::Synchronize");
+ device.synchronize().map_err(|e| {
+ MahoutError::Cuda(format!("CUDA device synchronize failed:
{:?}", e))
+ })?;
+ }
+
+ Ok(state_vector)
+ }
+
+ #[cfg(not(target_os = "linux"))]
+ {
+ Err(MahoutError::Cuda(
+ "CUDA unavailable (non-Linux stub)".to_string(),
+ ))
+ }
+ }
+
+ /// Encode multiple angle samples in a single GPU allocation and kernel
launch
+ #[cfg(target_os = "linux")]
+ fn encode_batch(
+ &self,
+ device: &Arc<CudaDevice>,
+ batch_data: &[f64],
+ num_samples: usize,
+ sample_size: usize,
+ num_qubits: usize,
+ ) -> Result<GpuStateVector> {
+ crate::profile_scope!("AngleEncoder::encode_batch");
+
+ if sample_size != num_qubits {
+ return Err(MahoutError::InvalidInput(format!(
+ "Angle encoding expects sample_size={} (one angle per qubit),
got {}",
+ num_qubits, sample_size
+ )));
+ }
+
+ if batch_data.len() != num_samples * sample_size {
+ return Err(MahoutError::InvalidInput(format!(
+ "Batch data length {} doesn't match num_samples {} *
sample_size {}",
+ batch_data.len(),
+ num_samples,
+ sample_size
+ )));
+ }
+
+ if num_qubits == 0 || num_qubits > 30 {
+ return Err(MahoutError::InvalidInput(format!(
+ "Number of qubits {} must be between 1 and 30",
+ num_qubits
+ )));
+ }
+
+ for (i, &val) in batch_data.iter().enumerate() {
+ if !val.is_finite() {
+ let sample_idx = i / sample_size;
+ let angle_idx = i % sample_size;
+ return Err(MahoutError::InvalidInput(format!(
+ "Sample {} angle {} must be finite, got {}",
+ sample_idx, angle_idx, val
+ )));
+ }
+ }
+
+ let state_len = 1 << num_qubits;
+
+ let batch_state_vector = {
+ crate::profile_scope!("GPU::AllocBatch");
+ GpuStateVector::new_batch(device, num_samples, num_qubits)?
+ };
+
+ let input_bytes = std::mem::size_of_val(batch_data);
+ let angles_gpu = {
+ crate::profile_scope!("GPU::H2D_BatchAngles");
+ device.htod_sync_copy(batch_data).map_err(|e| {
+ map_allocation_error(input_bytes, "angle batch upload",
Some(num_qubits), e)
+ })?
+ };
+
+ let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| {
+ MahoutError::InvalidInput(
+ "Batch state vector precision mismatch (expected float64
buffer)".to_string(),
+ )
+ })?;
+
+ {
+ crate::profile_scope!("GPU::BatchKernelLaunch");
+ let ret = unsafe {
+ qdp_kernels::launch_angle_encode_batch(
+ *angles_gpu.device_ptr() as *const f64,
+ state_ptr as *mut c_void,
+ num_samples,
+ state_len,
+ num_qubits as u32,
+ std::ptr::null_mut(),
+ )
+ };
+
+ if ret != 0 {
+ return Err(MahoutError::KernelLaunch(format!(
+ "Batch angle encoding kernel failed: {} ({})",
+ ret,
+ cuda_error_to_string(ret)
+ )));
+ }
+ }
+
+ {
+ crate::profile_scope!("GPU::Synchronize");
+ device
+ .synchronize()
+ .map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}",
e)))?;
+ }
+
+ Ok(batch_state_vector)
+ }
+
+ fn validate_input(&self, data: &[f64], num_qubits: usize) -> Result<()> {
+ if num_qubits == 0 {
+ return Err(MahoutError::InvalidInput(
+ "Number of qubits must be at least 1".to_string(),
+ ));
+ }
+ if num_qubits > 30 {
+ return Err(MahoutError::InvalidInput(format!(
+ "Number of qubits {} exceeds practical limit of 30",
+ num_qubits
+ )));
+ }
+ if data.len() != num_qubits {
+ return Err(MahoutError::InvalidInput(format!(
+ "Angle encoding expects {} values (one per qubit), got {}",
+ num_qubits,
+ data.len()
+ )));
+ }
+ for (i, &val) in data.iter().enumerate() {
+ if !val.is_finite() {
+ return Err(MahoutError::InvalidInput(format!(
+ "Angle at index {} must be finite, got {}",
+ i, val
+ )));
+ }
+ }
+ Ok(())
}
fn name(&self) -> &'static str {
@@ -45,6 +239,6 @@ impl QuantumEncoder for AngleEncoder {
}
fn description(&self) -> &'static str {
- "Angle encoding (not implemented)"
+ "Angle encoding: per-qubit rotations into a product state"
}
}
diff --git a/qdp/qdp-kernels/build.rs b/qdp/qdp-kernels/build.rs
index c845580a9..0c21ec6fd 100644
--- a/qdp/qdp-kernels/build.rs
+++ b/qdp/qdp-kernels/build.rs
@@ -30,6 +30,7 @@ fn main() {
// Tell Cargo to rerun this script if the kernel sources change
println!("cargo:rerun-if-changed=src/amplitude.cu");
println!("cargo:rerun-if-changed=src/basis.cu");
+ println!("cargo:rerun-if-changed=src/angle.cu");
// Check if CUDA is available by looking for nvcc
let has_cuda = Command::new("nvcc").arg("--version").output().is_ok();
@@ -83,5 +84,6 @@ fn main() {
// .flag("arch=compute_89,code=sm_89")
.file("src/amplitude.cu")
.file("src/basis.cu")
+ .file("src/angle.cu")
.compile("kernels");
}
diff --git a/qdp/qdp-kernels/src/angle.cu b/qdp/qdp-kernels/src/angle.cu
new file mode 100644
index 000000000..bf7de6e61
--- /dev/null
+++ b/qdp/qdp-kernels/src/angle.cu
@@ -0,0 +1,154 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Angle Encoding CUDA Kernels
+//
+// For each qubit angle x_k, build a product state:
+// |psi(x)> = ⊗_k (cos(x_k)|0> + sin(x_k)|1>)
+
+#include <cuda_runtime.h>
+#include <cuComplex.h>
+#include <math.h>
+
+__global__ void angle_encode_kernel(
+ const double* __restrict__ angles,
+ cuDoubleComplex* __restrict__ state,
+ size_t state_len,
+ unsigned int num_qubits
+) {
+ size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx >= state_len) return;
+
+ double amplitude = 1.0;
+ for (unsigned int bit = 0; bit < num_qubits; ++bit) {
+ double angle = angles[bit];
+ amplitude *= ((idx >> bit) & 1U) ? sin(angle) : cos(angle);
+ }
+
+ state[idx] = make_cuDoubleComplex(amplitude, 0.0);
+}
+
+__global__ void angle_encode_batch_kernel(
+ const double* __restrict__ angles_batch,
+ cuDoubleComplex* __restrict__ state_batch,
+ size_t num_samples,
+ size_t state_len,
+ unsigned int num_qubits
+) {
+ const size_t total_elements = num_samples * state_len;
+ const size_t stride = gridDim.x * blockDim.x;
+ const size_t state_mask = state_len - 1;
+
+ for (size_t global_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ global_idx < total_elements;
+ global_idx += stride) {
+ const size_t sample_idx = global_idx >> num_qubits;
+ const size_t element_idx = global_idx & state_mask;
+ const double* angles = angles_batch + sample_idx * num_qubits;
+
+ double amplitude = 1.0;
+ for (unsigned int bit = 0; bit < num_qubits; ++bit) {
+ double angle = angles[bit];
+ amplitude *= ((element_idx >> bit) & 1U) ? sin(angle) : cos(angle);
+ }
+
+ state_batch[global_idx] = make_cuDoubleComplex(amplitude, 0.0);
+ }
+}
+
+extern "C" {
+
+/// Launch angle encoding kernel
+///
+/// # Arguments
+/// * angles_d - Device pointer to per-qubit angles
+/// * state_d - Device pointer to output state vector
+/// * state_len - Target state vector size (2^num_qubits)
+/// * num_qubits - Number of qubits (angles length)
+/// * stream - CUDA stream for async execution (nullptr = default stream)
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_angle_encode(
+ const double* angles_d,
+ void* state_d,
+ size_t state_len,
+ unsigned int num_qubits,
+ cudaStream_t stream
+) {
+ if (state_len == 0 || num_qubits == 0) {
+ return cudaErrorInvalidValue;
+ }
+
+ cuDoubleComplex* state_complex_d = static_cast<cuDoubleComplex*>(state_d);
+
+ const int blockSize = 256;
+ const int gridSize = (state_len + blockSize - 1) / blockSize;
+
+ angle_encode_kernel<<<gridSize, blockSize, 0, stream>>>(
+ angles_d,
+ state_complex_d,
+ state_len,
+ num_qubits
+ );
+
+ return (int)cudaGetLastError();
+}
+
+/// Launch batch angle encoding kernel
+///
+/// # Arguments
+/// * angles_batch_d - Device pointer to batch angles (num_samples *
num_qubits)
+/// * state_batch_d - Device pointer to output batch state vectors
+/// * num_samples - Number of samples in batch
+/// * state_len - State vector size per sample (2^num_qubits)
+/// * num_qubits - Number of qubits (angles length)
+/// * stream - CUDA stream for async execution
+///
+/// # Returns
+/// CUDA error code (0 = cudaSuccess)
+int launch_angle_encode_batch(
+ const double* angles_batch_d,
+ void* state_batch_d,
+ size_t num_samples,
+ size_t state_len,
+ unsigned int num_qubits,
+ cudaStream_t stream
+) {
+ if (num_samples == 0 || state_len == 0 || num_qubits == 0) {
+ return cudaErrorInvalidValue;
+ }
+
+ cuDoubleComplex* state_complex_d =
static_cast<cuDoubleComplex*>(state_batch_d);
+
+ const int blockSize = 256;
+ const size_t total_elements = num_samples * state_len;
+ const size_t blocks_needed = (total_elements + blockSize - 1) / blockSize;
+ const size_t max_blocks = 2048;
+ const size_t gridSize = (blocks_needed < max_blocks) ? blocks_needed :
max_blocks;
+
+ angle_encode_batch_kernel<<<gridSize, blockSize, 0, stream>>>(
+ angles_batch_d,
+ state_complex_d,
+ num_samples,
+ state_len,
+ num_qubits
+ );
+
+ return (int)cudaGetLastError();
+}
+
+} // extern "C"
diff --git a/qdp/qdp-kernels/src/lib.rs b/qdp/qdp-kernels/src/lib.rs
index 536dc1df1..f1ebd351c 100644
--- a/qdp/qdp-kernels/src/lib.rs
+++ b/qdp/qdp-kernels/src/lib.rs
@@ -162,7 +162,32 @@ unsafe extern "C" {
stream: *mut c_void,
) -> i32;
- // TODO: launch_angle_encode
+ /// Launch angle encoding kernel
+ /// Returns CUDA error code (0 = success)
+ ///
+ /// # Safety
+ /// Requires valid GPU pointers, must sync before freeing
+ pub fn launch_angle_encode(
+ angles_d: *const f64,
+ state_d: *mut c_void,
+ state_len: usize,
+ num_qubits: u32,
+ stream: *mut c_void,
+ ) -> i32;
+
+ /// Launch batch angle encoding kernel
+ /// Returns CUDA error code (0 = success)
+ ///
+ /// # Safety
+ /// Requires valid GPU pointers, must sync before freeing
+ pub fn launch_angle_encode_batch(
+ angles_batch_d: *const f64,
+ state_batch_d: *mut c_void,
+ num_samples: usize,
+ state_len: usize,
+ num_qubits: u32,
+ stream: *mut c_void,
+ ) -> i32;
}
// Dummy implementation for non-Linux (allows compilation)
@@ -249,3 +274,28 @@ pub extern "C" fn launch_basis_encode_batch(
) -> i32 {
999
}
+
+#[cfg(not(target_os = "linux"))]
+#[unsafe(no_mangle)]
+pub extern "C" fn launch_angle_encode(
+ _angles_d: *const f64,
+ _state_d: *mut c_void,
+ _state_len: usize,
+ _num_qubits: u32,
+ _stream: *mut c_void,
+) -> i32 {
+ 999
+}
+
+#[cfg(not(target_os = "linux"))]
+#[unsafe(no_mangle)]
+pub extern "C" fn launch_angle_encode_batch(
+ _angles_batch_d: *const f64,
+ _state_batch_d: *mut c_void,
+ _num_samples: usize,
+ _state_len: usize,
+ _num_qubits: u32,
+ _stream: *mut c_void,
+) -> i32 {
+ 999
+}
diff --git a/testing/qdp/test_bindings.py b/testing/qdp/test_bindings.py
index 9c5acfd56..64bf09727 100644
--- a/testing/qdp/test_bindings.py
+++ b/testing/qdp/test_bindings.py
@@ -364,3 +364,99 @@ def test_basis_encode_errors():
# Test multiple values (basis expects exactly 1)
with pytest.raises(RuntimeError, match="expects exactly 1"):
engine.encode([0.0, 1.0], 2, "basis")
+
+
[email protected]
+def test_angle_encode_basic():
+ """Test basic angle encoding (requires GPU)."""
+ pytest.importorskip("torch")
+ import torch
+ from _qdp import QdpEngine
+
+ if not torch.cuda.is_available():
+ pytest.skip("GPU required for QdpEngine")
+
+ engine = QdpEngine(0)
+
+ # Angles [0, 0] should map to |00> with amplitude 1 at index 0.
+ qtensor = engine.encode([0.0, 0.0], 2, "angle")
+ torch_tensor = torch.from_dlpack(qtensor)
+
+ assert torch_tensor.is_cuda
+ assert torch_tensor.shape == (1, 4)
+
+ expected = torch.tensor([[1.0 + 0j, 0.0 + 0j, 0.0 + 0j, 0.0 + 0j]],
device="cuda:0")
+ assert torch.allclose(torch_tensor, expected.to(torch_tensor.dtype))
+
+
[email protected]
+def test_angle_encode_nonzero_angles():
+ """Test angle encoding with non-zero angles (requires GPU)."""
+ pytest.importorskip("torch")
+ import torch
+ from _qdp import QdpEngine
+
+ if not torch.cuda.is_available():
+ pytest.skip("GPU required for QdpEngine")
+
+ engine = QdpEngine(0)
+
+ angles = [torch.pi / 2, 0.0]
+ qtensor = engine.encode(angles, 2, "angle")
+ torch_tensor = torch.from_dlpack(qtensor)
+
+ expected = torch.tensor([[0.0 + 0j, 1.0 + 0j, 0.0 + 0j, 0.0 + 0j]],
device="cuda:0")
+ assert torch.allclose(
+ torch_tensor, expected.to(torch_tensor.dtype), atol=1e-6, rtol=1e-6
+ )
+
+
[email protected]
+def test_angle_encode_batch():
+ """Test batch angle encoding (requires GPU)."""
+ pytest.importorskip("torch")
+ import torch
+ from _qdp import QdpEngine
+
+ if not torch.cuda.is_available():
+ pytest.skip("GPU required for QdpEngine")
+
+ engine = QdpEngine(0)
+
+ data = torch.tensor([[0.0, 0.0], [torch.pi / 2, 0.0]], dtype=torch.float64)
+ qtensor = engine.encode(data, 2, "angle")
+ torch_tensor = torch.from_dlpack(qtensor)
+
+ assert torch_tensor.shape == (2, 4)
+
+ expected = torch.tensor(
+ [
+ [1.0 + 0j, 0.0 + 0j, 0.0 + 0j, 0.0 + 0j],
+ [0.0 + 0j, 1.0 + 0j, 0.0 + 0j, 0.0 + 0j],
+ ],
+ device="cuda:0",
+ )
+ assert torch.allclose(
+ torch_tensor, expected.to(torch_tensor.dtype), atol=1e-6, rtol=1e-6
+ )
+
+
[email protected]
+def test_angle_encode_errors():
+ """Test error handling for angle encoding (requires GPU)."""
+ pytest.importorskip("torch")
+ import torch
+ from _qdp import QdpEngine
+
+ if not torch.cuda.is_available():
+ pytest.skip("GPU required for QdpEngine")
+
+ engine = QdpEngine(0)
+
+ # Wrong length (expects one angle per qubit)
+ with pytest.raises(RuntimeError, match="expects 2 values"):
+ engine.encode([0.0], 2, "angle")
+
+ # Non-finite angle
+ with pytest.raises(RuntimeError, match="must be finite"):
+ engine.encode([float("nan"), 0.0], 2, "angle")