This is an automated email from the ASF dual-hosted git repository.
guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 640c160eb [QDP] Support float32 CUDA amplitude encoding in Python
bindings (#1025)
640c160eb is described below
commit 640c160eb57589725fbe8c4e50df3219609730fc
Author: Vic Wen <[email protected]>
AuthorDate: Wed Feb 18 23:42:15 2026 +0800
[QDP] Support float32 CUDA amplitude encoding in Python bindings (#1025)
* feat: add direct encoding method for float32 tensors
* fix: fix rebase error
* feat: removes redundant checks & adds notes
---
qdp/qdp-python/src/lib.rs | 215 +++++++++++++++++--------
qdp/qdp-python/tests/test_dlpack_validation.py | 33 ++--
testing/qdp/test_bindings.py | 33 +++-
3 files changed, 193 insertions(+), 88 deletions(-)
diff --git a/qdp/qdp-python/src/lib.rs b/qdp/qdp-python/src/lib.rs
index 7afea5284..12335cb72 100644
--- a/qdp/qdp-python/src/lib.rs
+++ b/qdp/qdp-python/src/lib.rs
@@ -286,7 +286,16 @@ fn validate_cuda_tensor_for_encoding(
let dtype_str: String = dtype.str()?.extract()?;
let dtype_str_lower = dtype_str.to_ascii_lowercase();
match method.as_str() {
- "amplitude" | "angle" => {
+ "amplitude" => {
+ if !(dtype_str_lower.contains("float64") ||
dtype_str_lower.contains("float32")) {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA tensor must have dtype float64 or float32 for
amplitude encoding, got {}. \
+ Use tensor.to(torch.float64) or tensor.to(torch.float32)",
+ dtype_str
+ )));
+ }
+ }
+ "angle" => {
if !dtype_str_lower.contains("float64") {
return Err(PyRuntimeError::new_err(format!(
"CUDA tensor must have dtype float64 for {} encoding, got
{}. \
@@ -642,76 +651,7 @@ impl QdpEngine {
if is_pytorch_tensor(data)? {
// Check if it's a CUDA tensor - use zero-copy GPU encoding
if is_cuda_tensor(data)? {
- // Validate CUDA tensor for direct GPU encoding
- validate_cuda_tensor_for_encoding(
- data,
- self.engine.device().ordinal(),
- encoding_method,
- )?;
-
- // Extract GPU pointer directly from PyTorch tensor
- let tensor_info = extract_cuda_tensor_info(data)?;
- let stream_ptr = get_torch_cuda_stream_ptr(data)?;
-
- let ndim: usize = data.call_method0("dim")?.extract()?;
-
- match ndim {
- 1 => {
- // 1D CUDA tensor: single sample encoding
- let input_len = tensor_info.shape[0] as usize;
- // SAFETY: tensor_info.data_ptr was obtained via
PyTorch's data_ptr() from a
- // valid CUDA tensor. The tensor remains alive during
this call
- // (held by Python's GIL), and we validated
dtype/contiguity/device above.
- let ptr = unsafe {
- self.engine
- .encode_from_gpu_ptr_with_stream(
- tensor_info.data_ptr as *const
std::ffi::c_void,
- input_len,
- num_qubits,
- encoding_method,
- stream_ptr,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- return Ok(QuantumTensor {
- ptr,
- consumed: false,
- });
- }
- 2 => {
- // 2D CUDA tensor: batch encoding
- let num_samples = tensor_info.shape[0] as usize;
- let sample_size = tensor_info.shape[1] as usize;
- // SAFETY: Same as above - pointer from validated
PyTorch CUDA tensor
- let ptr = unsafe {
- self.engine
- .encode_batch_from_gpu_ptr_with_stream(
- tensor_info.data_ptr as *const
std::ffi::c_void,
- num_samples,
- sample_size,
- num_qubits,
- encoding_method,
- stream_ptr,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- return Ok(QuantumTensor {
- ptr,
- consumed: false,
- });
- }
- _ => {
- return Err(PyRuntimeError::new_err(format!(
- "Unsupported CUDA tensor shape: {}D. Expected 1D
tensor for single \
- sample encoding or 2D tensor (batch_size,
features) for batch encoding.",
- ndim
- )));
- }
- }
+ return self._encode_from_cuda_tensor(data, num_qubits,
encoding_method);
}
// CPU PyTorch tensor path
return self.encode_from_pytorch(data, num_qubits, encoding_method);
@@ -1149,6 +1089,139 @@ impl QdpEngine {
})?;
Ok(PyQuantumLoader::new(Some(iter)))
}
+
+ /// Encode directly from a PyTorch CUDA tensor. Internal helper.
+ ///
+ /// Dispatches to the core f32 GPU pointer API for 1D float32 amplitude
encoding,
+ /// or to the float64/basis GPU pointer APIs for other dtypes and batch
encoding.
+ ///
+ /// Args:
+ /// data: PyTorch CUDA tensor
+ /// num_qubits: Number of qubits
+ /// encoding_method: Encoding strategy (currently only "amplitude")
+ fn _encode_from_cuda_tensor(
+ &self,
+ data: &Bound<'_, PyAny>,
+ num_qubits: usize,
+ encoding_method: &str,
+ ) -> PyResult<QuantumTensor> {
+ // Validate CUDA tensor for direct GPU encoding (shape, contiguity,
device, dtype)
+ validate_cuda_tensor_for_encoding(data,
self.engine.device().ordinal(), encoding_method)?;
+
+ // Determine dtype for dispatch (float32 vs float64, etc.).
+ let dtype = data.getattr("dtype")?;
+ let dtype_str: String = dtype.str()?.extract()?;
+ let dtype_str_lower = dtype_str.to_ascii_lowercase();
+ let is_f32 = dtype_str_lower.contains("float32");
+ let method = encoding_method.to_ascii_lowercase();
+
+ // Current f32 CUDA path only supports amplitude encoding for 1D
tensors.
+ let ndim: usize = data.call_method0("dim")?.extract()?;
+
+ if method.as_str() == "amplitude" && is_f32 {
+ // NOTE: This f32 fast path intentionally bypasses
`extract_cuda_tensor_info`/DLPack
+ // and uses PyTorch's `data_ptr()`/`numel()` directly, after
+ // `validate_cuda_tensor_for_encoding` has already enforced
dtype/shape/contiguity/device.
+ // If additional validation is added to `extract_cuda_tensor_info`
in the future, it must
+ // be mirrored here to keep behavior consistent.
+ match ndim {
+ 1 => {
+ // 1D CUDA tensor, float32 amplitude encoding using core
f32 GPU pointer API.
+ let input_len: usize =
data.call_method0("numel")?.extract()?;
+ let stream_ptr = get_torch_cuda_stream_ptr(data)?;
+ let data_ptr_u64: u64 =
data.call_method0("data_ptr")?.extract()?;
+ let data_ptr = data_ptr_u64 as *const f32;
+
+ let ptr = unsafe {
+ self.engine
+ .encode_from_gpu_ptr_f32_with_stream(
+ data_ptr, input_len, num_qubits, stream_ptr,
+ )
+ .map_err(|e| {
+ PyRuntimeError::new_err(format!(
+ "Encoding failed (float32 amplitude): {}",
+ e
+ ))
+ })?
+ };
+
+ Ok(QuantumTensor {
+ ptr,
+ consumed: false,
+ })
+ }
+ 2 => Err(PyRuntimeError::new_err(
+ "CUDA float32 batch amplitude encoding is not yet
supported. \
+ Use float64 (tensor.to(torch.float64)) or encode samples
individually.",
+ )),
+ _ => Err(PyRuntimeError::new_err(format!(
+ "Unsupported CUDA tensor shape: {}D. Expected 1D tensor
for single \
+ sample encoding or 2D tensor (batch_size, features) for
batch encoding.",
+ ndim
+ ))),
+ }
+ } else {
+ // Existing float64 (and basis/int64) CUDA path using direct GPU
pointer.
+ let tensor_info = extract_cuda_tensor_info(data)?;
+ let stream_ptr = get_torch_cuda_stream_ptr(data)?;
+
+ match ndim {
+ 1 => {
+ // 1D CUDA tensor: single sample encoding
+ let input_len = tensor_info.shape[0] as usize;
+ // SAFETY: tensor_info.data_ptr was obtained via PyTorch's
data_ptr() from a
+ // valid CUDA tensor. The tensor remains alive during this
call
+ // (held by Python's GIL), and we validated
dtype/contiguity/device above.
+ let ptr = unsafe {
+ self.engine
+ .encode_from_gpu_ptr_with_stream(
+ tensor_info.data_ptr as *const
std::ffi::c_void,
+ input_len,
+ num_qubits,
+ encoding_method,
+ stream_ptr,
+ )
+ .map_err(|e| {
+ PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
+ })?
+ };
+ Ok(QuantumTensor {
+ ptr,
+ consumed: false,
+ })
+ }
+ 2 => {
+ // 2D CUDA tensor: batch encoding
+ let num_samples = tensor_info.shape[0] as usize;
+ let sample_size = tensor_info.shape[1] as usize;
+ // SAFETY: Same as above - pointer from validated PyTorch
CUDA tensor
+ let ptr = unsafe {
+ self.engine
+ .encode_batch_from_gpu_ptr_with_stream(
+ tensor_info.data_ptr as *const
std::ffi::c_void,
+ num_samples,
+ sample_size,
+ num_qubits,
+ encoding_method,
+ stream_ptr,
+ )
+ .map_err(|e| {
+ PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
+ })?
+ };
+ Ok(QuantumTensor {
+ ptr,
+ consumed: false,
+ })
+ }
+ _ => Err(PyRuntimeError::new_err(format!(
+ "Unsupported CUDA tensor shape: {}D. Expected 1D tensor
for single \
+ sample encoding or 2D tensor (batch_size, features) for
batch encoding.",
+ ndim
+ ))),
+ }
+ }
+ }
}
// --- Loader bindings (Linux only; qdp-core pipeline types only built on
Linux) ---
diff --git a/qdp/qdp-python/tests/test_dlpack_validation.py
b/qdp/qdp-python/tests/test_dlpack_validation.py
index 9f624a606..b2e605018 100644
--- a/qdp/qdp-python/tests/test_dlpack_validation.py
+++ b/qdp/qdp-python/tests/test_dlpack_validation.py
@@ -32,23 +32,30 @@ def _engine():
@pytest.mark.skipif(not _cuda_available(), reason="CUDA not available")
-def test_dtype_validation_float32_rejected():
- """DLPack tensor must be float64; float32 CUDA tensor should fail with
clear message."""
+def test_cuda_float32_amplitude_supported():
+ """1D float32 CUDA tensor should be supported for amplitude encoding via
GPU pointer f32 path."""
engine = _engine()
# 1D float32 CUDA tensor (contiguous)
t = torch.randn(4, dtype=torch.float32, device="cuda")
- with pytest.raises(RuntimeError) as exc_info:
+ result = engine.encode(t, num_qubits=2, encoding_method="amplitude")
+ assert result is not None
+
+ # Verify DLPack round-trip works and tensor is on CUDA
+ qt = torch.from_dlpack(result)
+ assert qt.is_cuda
+ # With default engine precision=float32, complex64 is expected
+ assert qt.dtype in (torch.complex64, torch.complex128)
+
+
[email protected](not _cuda_available(), reason="CUDA not available")
+def test_cuda_float32_amplitude_2d_unsupported():
+ """2D float32 CUDA tensor with amplitude encoding should raise a clear
error."""
+ engine = _engine()
+ t = torch.randn(2, 4, dtype=torch.float32, device="cuda")
+ with pytest.raises(
+ RuntimeError, match="float32 batch amplitude encoding is not yet
supported"
+ ):
engine.encode(t, num_qubits=2, encoding_method="amplitude")
- msg = str(exc_info.value).lower()
- assert "float64" in msg
- # Accept either DLPack-style (code=/bits=/lanes=) or user-facing
(float32/dtype) message
- assert (
- "code=" in msg
- or "bits=" in msg
- or "lanes=" in msg
- or "float32" in msg
- or "dtype" in msg
- )
@pytest.mark.skipif(not _cuda_available(), reason="CUDA not available")
diff --git a/testing/qdp/test_bindings.py b/testing/qdp/test_bindings.py
index 73a553bd4..0bc971d1c 100644
--- a/testing/qdp/test_bindings.py
+++ b/testing/qdp/test_bindings.py
@@ -315,7 +315,7 @@ def test_encode_cuda_tensor(data_shape, expected_shape,
expected_batch_size):
@requires_qdp
@pytest.mark.gpu
def test_encode_cuda_tensor_wrong_dtype():
- """Test error when CUDA tensor has wrong dtype (non-float64)."""
+ """Test error when CUDA tensor has wrong dtype for amplitude (e.g.
float16)."""
pytest.importorskip("torch")
from _qdp import QdpEngine
@@ -324,9 +324,9 @@ def test_encode_cuda_tensor_wrong_dtype():
engine = QdpEngine(0)
- # Create CUDA tensor with float32 dtype (wrong)
- data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32,
device="cuda:0")
- with pytest.raises(RuntimeError, match="CUDA tensor must have dtype
float64"):
+ # Amplitude encoding accepts float64 or float32 only; float16 is invalid
+ data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float16,
device="cuda:0")
+ with pytest.raises(RuntimeError, match="float64 or float32"):
engine.encode(data, 2, "amplitude")
@@ -538,6 +538,31 @@ def test_encode_cuda_tensor_output_dtype(precision,
expected_dtype):
)
+@requires_qdp
[email protected]
[email protected](
+ "precision,expected_dtype",
+ [
+ ("float32", torch.complex64),
+ ("float64", torch.complex128),
+ ],
+)
+def test_encode_cuda_tensor_float32_input_output_dtype(precision,
expected_dtype):
+ """Test that 1D float32 CUDA amplitude encoding respects engine precision
(f32 path)."""
+ pytest.importorskip("torch")
+ from _qdp import QdpEngine
+
+ if not torch.cuda.is_available():
+ pytest.skip("GPU required for QdpEngine")
+
+ engine = QdpEngine(0, precision=precision)
+ data = torch.tensor([1.0, 2.0, 3.0, 4.0], dtype=torch.float32,
device="cuda:0")
+ result = torch.from_dlpack(engine.encode(data, 2, "amplitude"))
+ assert result.dtype == expected_dtype, (
+ f"Expected {expected_dtype}, got {result.dtype}"
+ )
+
+
@requires_qdp
@pytest.mark.gpu
def test_basis_encode_basic():