This is an automated email from the ASF dual-hosted git repository.
hcr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 15d3df9d6 [QDP] Refactor qdp-python `lib.rs` (#1054)
15d3df9d6 is described below
commit 15d3df9d6f9169540804ef5e49423ec5270ff987
Author: Vic Wen <[email protected]>
AuthorDate: Sat Feb 21 20:34:29 2026 +0800
[QDP] Refactor qdp-python `lib.rs` (#1054)
* refactor: refactor qdp-python `lib.rs`
* feat: add main branch feature
---
qdp/qdp-python/src/dlpack.rs | 200 +++++
qdp/qdp-python/src/{lib.rs => engine.rs} | 847 +++----------------
qdp/qdp-python/src/lib.rs | 1302 +-----------------------------
qdp/qdp-python/src/loader.rs | 103 +++
qdp/qdp-python/src/pytorch.rs | 251 ++++++
qdp/qdp-python/src/tensor.rs | 147 ++++
6 files changed, 807 insertions(+), 2043 deletions(-)
diff --git a/qdp/qdp-python/src/dlpack.rs b/qdp/qdp-python/src/dlpack.rs
new file mode 100644
index 000000000..4a1ad08ec
--- /dev/null
+++ b/qdp/qdp-python/src/dlpack.rs
@@ -0,0 +1,200 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::ffi;
+use pyo3::prelude::*;
+use qdp_core::dlpack::{DL_FLOAT, DLDeviceType, DLManagedTensor};
+use std::ffi::c_void;
+
+/// DLPack tensor information extracted from a PyCapsule
+///
+/// This struct owns the DLManagedTensor pointer and ensures proper cleanup
+/// via the DLPack deleter when dropped (RAII pattern).
+pub struct DLPackTensorInfo {
+ /// Raw DLManagedTensor pointer from PyTorch DLPack capsule
+ /// This is owned by this struct and will be freed via deleter on drop
+ pub managed_ptr: *mut DLManagedTensor,
+ /// Data pointer inside dl_tensor (GPU memory, owned by managed_ptr)
+ pub data_ptr: *const c_void,
+ pub shape: Vec<i64>,
+ /// CUDA device ID from DLPack metadata.
+ /// Used for defensive validation against PyTorch API device ID.
+ pub device_id: i32,
+}
+
+impl Drop for DLPackTensorInfo {
+ fn drop(&mut self) {
+ unsafe {
+ if !self.managed_ptr.is_null() {
+ // Per DLPack protocol: consumer must call deleter exactly once
+ if let Some(deleter) = (*self.managed_ptr).deleter {
+ deleter(self.managed_ptr);
+ }
+ // Prevent double-free
+ self.managed_ptr = std::ptr::null_mut();
+ }
+ }
+ }
+}
+
+/// Extract GPU pointer from PyTorch tensor's __dlpack__() capsule
+///
+/// Uses the DLPack protocol to obtain a zero-copy view of the tensor's GPU
memory.
+/// The returned `DLPackTensorInfo` owns the DLManagedTensor and will
automatically
+/// call the deleter when dropped, ensuring proper resource cleanup.
+///
+/// # Safety
+/// The returned `data_ptr` points to GPU memory owned by the source tensor.
+/// The caller must ensure the source tensor remains alive and unmodified
+/// for the entire duration that `data_ptr` is in use. Python's GIL ensures
+/// the tensor won't be garbage collected during `encode()`, but the caller
+/// must not deallocate or resize the tensor while encoding is in progress.
+pub fn extract_dlpack_tensor(
+ _py: Python<'_>,
+ tensor: &Bound<'_, PyAny>,
+) -> PyResult<DLPackTensorInfo> {
+ // Call tensor.__dlpack__() to get PyCapsule
+ // Note: PyTorch's __dlpack__ uses the default stream when called without
arguments
+ let capsule = tensor.call_method0("__dlpack__")?;
+
+ const DLTENSOR_NAME: &[u8] = b"dltensor\0";
+
+ // SAFETY: capsule is a valid PyCapsule from tensor.__dlpack__().
DLTENSOR_NAME is a
+ // null-terminated C string for the lifetime of the call. We only read the
capsule
+ // and call PyCapsule_IsValid / PyCapsule_GetPointer; we do not invalidate
the capsule.
+ let managed_ptr = unsafe {
+ let capsule_ptr = capsule.as_ptr();
+ if ffi::PyCapsule_IsValid(capsule_ptr, DLTENSOR_NAME.as_ptr() as
*const i8) == 0 {
+ return Err(PyRuntimeError::new_err(
+ "Invalid DLPack capsule (expected 'dltensor')",
+ ));
+ }
+ let ptr = ffi::PyCapsule_GetPointer(capsule_ptr,
DLTENSOR_NAME.as_ptr() as *const i8)
+ as *mut DLManagedTensor;
+ if ptr.is_null() {
+ return Err(PyRuntimeError::new_err(
+ "Failed to extract DLManagedTensor from PyCapsule",
+ ));
+ }
+ ptr
+ };
+
+ // SAFETY: managed_ptr is non-null and was returned by
PyCapsule_GetPointer for a valid
+ // "dltensor" capsule, so it points to a valid DLManagedTensor. The
capsule (and thus
+ // the tensor) is held by the caller for the duration of this function. We
read fields
+ // and create slices from shape/strides only when non-null and ndim is
valid.
+ unsafe {
+ let dl_tensor = &(*managed_ptr).dl_tensor;
+
+ if dl_tensor.data.is_null() {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor has null data pointer",
+ ));
+ }
+
+ if dl_tensor.device.device_type != DLDeviceType::kDLCUDA {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor must be on CUDA device",
+ ));
+ }
+
+ if dl_tensor.dtype.code != DL_FLOAT
+ || dl_tensor.dtype.bits != 64
+ || dl_tensor.dtype.lanes != 1
+ {
+ return Err(PyRuntimeError::new_err(format!(
+ "DLPack tensor must be float64 (code={}, bits={}, lanes={})",
+ dl_tensor.dtype.code, dl_tensor.dtype.bits,
dl_tensor.dtype.lanes
+ )));
+ }
+
+ if !dl_tensor
+ .byte_offset
+ .is_multiple_of(std::mem::size_of::<f64>() as u64)
+ {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor byte_offset is not aligned for float64",
+ ));
+ }
+
+ let data_ptr =
+ (dl_tensor.data as *const u8).add(dl_tensor.byte_offset as usize)
as *const f64;
+
+ let ndim = dl_tensor.ndim as usize;
+ // SAFETY: shape pointer is valid for ndim elements when non-null
(DLPack contract).
+ let shape = if ndim > 0 && !dl_tensor.shape.is_null() {
+ std::slice::from_raw_parts(dl_tensor.shape, ndim).to_vec()
+ } else {
+ vec![]
+ };
+
+ if ndim == 0 || shape.is_empty() {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor must have at least 1 dimension",
+ ));
+ }
+
+ if !dl_tensor.strides.is_null() {
+ // SAFETY: strides pointer is valid for ndim elements (DLPack
contract).
+ let strides = std::slice::from_raw_parts(dl_tensor.strides, ndim);
+ match ndim {
+ 1 => {
+ let expected = 1_i64;
+ if strides[0] != expected {
+ return Err(PyRuntimeError::new_err(format!(
+ "DLPack tensor must be contiguous: stride[0]={},
expected {}",
+ strides[0], expected
+ )));
+ }
+ }
+ 2 => {
+ if shape.len() < 2 {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor must be contiguous (shape len < 2)",
+ ));
+ }
+ let expected_stride_1 = 1_i64;
+ let expected_stride_0 = shape[1];
+ if strides[1] != expected_stride_1 || strides[0] !=
expected_stride_0 {
+ return Err(PyRuntimeError::new_err(format!(
+ "DLPack tensor must be contiguous: strides=[{},
{}], expected [{}, {}] (expected[1]=shape[1])",
+ strides[0], strides[1], expected_stride_0,
expected_stride_1
+ )));
+ }
+ }
+ _ => {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor must be 1D or 2D for encoding",
+ ));
+ }
+ }
+ }
+
+ let device_id = dl_tensor.device.device_id;
+
+ const USED_DLTENSOR_NAME: &[u8] = b"used_dltensor\0";
+ // SAFETY: capsule is the same PyCapsule we used above; renaming is
allowed and does not free it.
+ ffi::PyCapsule_SetName(capsule.as_ptr(), USED_DLTENSOR_NAME.as_ptr()
as *const i8);
+
+ Ok(DLPackTensorInfo {
+ managed_ptr,
+ data_ptr: data_ptr as *const std::ffi::c_void,
+ shape,
+ device_id,
+ })
+ }
+}
diff --git a/qdp/qdp-python/src/lib.rs b/qdp/qdp-python/src/engine.rs
similarity index 51%
copy from qdp/qdp-python/src/lib.rs
copy to qdp/qdp-python/src/engine.rs
index 12335cb72..29f56909a 100644
--- a/qdp/qdp-python/src/lib.rs
+++ b/qdp/qdp-python/src/engine.rs
@@ -14,555 +14,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+use crate::dlpack::extract_dlpack_tensor;
+use crate::pytorch::{
+ extract_cuda_tensor_info, get_tensor_device_id, get_torch_cuda_stream_ptr,
is_cuda_tensor,
+ is_pytorch_tensor, validate_cuda_tensor_for_encoding, validate_shape,
validate_tensor,
+};
+use crate::tensor::QuantumTensor;
use numpy::{PyReadonlyArray1, PyReadonlyArray2, PyUntypedArrayMethods};
use pyo3::exceptions::PyRuntimeError;
-use pyo3::ffi;
use pyo3::prelude::*;
-use qdp_core::dlpack::{DL_FLOAT, DLDeviceType, DLManagedTensor};
use qdp_core::{Precision, QdpEngine as CoreEngine};
-use std::ffi::c_void;
-/// Quantum tensor wrapper implementing DLPack protocol
-///
-/// This class wraps a GPU-allocated quantum state vector and implements
-/// the DLPack protocol for zero-copy integration with PyTorch and other
-/// array libraries.
-///
-/// Example:
-/// >>> engine = QdpEngine(device_id=0)
-/// >>> qtensor = engine.encode([1.0, 2.0, 3.0], num_qubits=2,
encoding_method="amplitude")
-/// >>> torch_tensor = torch.from_dlpack(qtensor)
-#[pyclass]
-struct QuantumTensor {
- ptr: *mut DLManagedTensor,
- consumed: bool,
-}
-
-#[pymethods]
-impl QuantumTensor {
- /// Implements DLPack protocol - returns PyCapsule for PyTorch
- ///
- /// This method is called by torch.from_dlpack() to get the GPU memory
pointer.
- /// The capsule can only be consumed once to prevent double-free errors.
- ///
- /// Args:
- /// stream: Optional CUDA stream (DLPack 0.8+; 1=legacy default,
2=per-thread default)
- ///
- /// Returns:
- /// PyCapsule containing DLManagedTensor pointer
- ///
- /// Raises:
- /// RuntimeError: If the tensor has already been consumed
- #[pyo3(signature = (stream=None))]
- fn __dlpack__<'py>(&mut self, py: Python<'py>, stream: Option<i64>) ->
PyResult<Py<PyAny>> {
- if self.consumed {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor already consumed (can only be used once)",
- ));
- }
-
- if self.ptr.is_null() {
- return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
- }
-
- if let Some(stream) = stream
- && stream > 0
- {
- let stream_ptr = qdp_core::dlpack::dlpack_stream_to_cuda(stream);
- unsafe {
- qdp_core::dlpack::synchronize_stream(stream_ptr).map_err(|e| {
- PyRuntimeError::new_err(format!("CUDA stream sync failed:
{}", e))
- })?;
- }
- }
-
- // Mark as consumed to prevent double-free
- self.consumed = true;
-
- // Create PyCapsule using FFI
- // PyTorch will call the deleter stored in DLManagedTensor.deleter
- // Use a static C string for the capsule name to avoid lifetime issues
- const DLTENSOR_NAME: &[u8] = b"dltensor\0";
-
- unsafe {
- // Create PyCapsule without a destructor
- // PyTorch will manually call the deleter from DLManagedTensor
- let capsule_ptr = ffi::PyCapsule_New(
- self.ptr as *mut std::ffi::c_void,
- DLTENSOR_NAME.as_ptr() as *const i8,
- None, // No destructor - PyTorch handles it
- );
-
- if capsule_ptr.is_null() {
- return Err(PyRuntimeError::new_err("Failed to create
PyCapsule"));
- }
-
- Ok(Py::from_owned_ptr(py, capsule_ptr))
- }
- }
-
- /// Returns DLPack device information
- ///
- /// Returns:
- /// Tuple of (device_type, device_id) where device_type=2 for CUDA
- fn __dlpack_device__(&self) -> PyResult<(i32, i32)> {
- if self.ptr.is_null() {
- return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
- }
-
- unsafe {
- let tensor = &(*self.ptr).dl_tensor;
- // DLPack device_type: kDLCUDA = 2, kDLCPU = 1
- let device_type = match tensor.device.device_type {
- qdp_core::dlpack::DLDeviceType::kDLCUDA => 2,
- qdp_core::dlpack::DLDeviceType::kDLCPU => 1,
- };
- // Read device_id from DLPack tensor metadata
- Ok((device_type, tensor.device.device_id))
- }
- }
-}
-
-impl Drop for QuantumTensor {
- fn drop(&mut self) {
- // Only free if not consumed by __dlpack__
- // If consumed, PyTorch/consumer will call the deleter
- if !self.consumed && !self.ptr.is_null() {
- unsafe {
- // Defensive check: qdp-core always provides a deleter
- debug_assert!(
- (*self.ptr).deleter.is_some(),
- "DLManagedTensor from qdp-core should always have a
deleter"
- );
-
- // Call the DLPack deleter to free memory
- if let Some(deleter) = (*self.ptr).deleter {
- deleter(self.ptr);
- }
- }
- }
- }
-}
-
-// Safety: QuantumTensor can be sent between threads
-// The DLManagedTensor pointer management is thread-safe via Arc in the deleter
-unsafe impl Send for QuantumTensor {}
-unsafe impl Sync for QuantumTensor {}
-
-/// Helper to detect PyTorch tensor
-fn is_pytorch_tensor(obj: &Bound<'_, PyAny>) -> PyResult<bool> {
- let type_obj = obj.get_type();
- let name = type_obj.name()?;
- if name != "Tensor" {
- return Ok(false);
- }
- let module = type_obj.module()?;
- let module_name = module.to_str()?;
- Ok(module_name == "torch")
-}
-
-/// Helper to validate CPU tensor
-fn validate_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<()> {
- if !is_pytorch_tensor(tensor)? {
- return Err(PyRuntimeError::new_err("Object is not a PyTorch Tensor"));
- }
-
- let device = tensor.getattr("device")?;
- let device_type: String = device.getattr("type")?.extract()?;
-
- if device_type != "cpu" {
- return Err(PyRuntimeError::new_err(format!(
- "Only CPU tensors are currently supported for this path. Got
device: {}",
- device_type
- )));
- }
-
- Ok(())
-}
-
-/// Check if a PyTorch tensor is on a CUDA device
-fn is_cuda_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<bool> {
- let device = tensor.getattr("device")?;
- let device_type: String = device.getattr("type")?.extract()?;
- Ok(device_type == "cuda")
-}
-
-/// Validate array/tensor shape (must be 1D or 2D)
-///
-/// Args:
-/// ndim: Number of dimensions
-/// context: Context string for error message (e.g., "array", "tensor",
"CUDA tensor")
-///
-/// Returns:
-/// Ok(()) if shape is valid (1D or 2D), otherwise returns an error
-fn validate_shape(ndim: usize, context: &str) -> PyResult<()> {
- match ndim {
- 1 | 2 => Ok(()),
- _ => {
- let item_type = if context.contains("array") {
- "array"
- } else {
- "tensor"
- };
- Err(PyRuntimeError::new_err(format!(
- "Unsupported {} shape: {}D. Expected 1D {} for single sample \
- encoding or 2D {} (batch_size, features) for batch encoding.",
- context, ndim, item_type, item_type
- )))
- }
- }
-}
-
-/// Get the CUDA device index from a PyTorch tensor
-fn get_tensor_device_id(tensor: &Bound<'_, PyAny>) -> PyResult<i32> {
- let device = tensor.getattr("device")?;
- let device_index: i32 = device.getattr("index")?.extract()?;
- Ok(device_index)
-}
-
-/// Get the current CUDA stream pointer for the tensor's device.
-fn get_torch_cuda_stream_ptr(tensor: &Bound<'_, PyAny>) -> PyResult<*mut
c_void> {
- let py = tensor.py();
- let torch = PyModule::import(py, "torch")
- .map_err(|_| PyRuntimeError::new_err("Failed to import torch
module"))?;
- let cuda = torch.getattr("cuda")?;
- let device = tensor.getattr("device")?;
- let stream = cuda.call_method1("current_stream", (device,))?;
-
- // Defensive validation: ensure the stream is a CUDA stream on the same
device
- let stream_device = stream.getattr("device").map_err(|_| {
- PyRuntimeError::new_err("CUDA stream object from PyTorch is missing
'device' attribute")
- })?;
- let stream_device_type: String = stream_device
- .getattr("type")
- .and_then(|obj| obj.extract())
- .map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract CUDA stream device type from PyTorch
stream.device",
- )
- })?;
- if stream_device_type != "cuda" {
- return Err(PyRuntimeError::new_err(format!(
- "Expected CUDA stream device type 'cuda', got '{}'",
- stream_device_type
- )));
- }
-
- let stream_device_index: i32 = stream_device
- .getattr("index")
- .and_then(|obj| obj.extract())
- .map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract CUDA stream device index from PyTorch
stream.device",
- )
- })?;
- let tensor_device_index = get_tensor_device_id(tensor)?;
- if stream_device_index != tensor_device_index {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA stream device index ({}) does not match tensor device index
({})",
- stream_device_index, tensor_device_index
- )));
- }
-
- let stream_ptr: u64 = stream.getattr("cuda_stream")?.extract()?;
- Ok(if stream_ptr == 0 {
- std::ptr::null_mut()
- } else {
- stream_ptr as *mut c_void
- })
-}
-
-/// Validate a CUDA tensor for direct GPU encoding
-/// Checks: dtype matches encoding method, contiguous, non-empty, device_id
matches engine
-fn validate_cuda_tensor_for_encoding(
- tensor: &Bound<'_, PyAny>,
- expected_device_id: usize,
- encoding_method: &str,
-) -> PyResult<()> {
- let method = encoding_method.to_ascii_lowercase();
-
- // Check encoding method support and dtype (ASCII lowercase for
case-insensitive match).
- let dtype = tensor.getattr("dtype")?;
- let dtype_str: String = dtype.str()?.extract()?;
- let dtype_str_lower = dtype_str.to_ascii_lowercase();
- match method.as_str() {
- "amplitude" => {
- if !(dtype_str_lower.contains("float64") ||
dtype_str_lower.contains("float32")) {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype float64 or float32 for
amplitude encoding, got {}. \
- Use tensor.to(torch.float64) or tensor.to(torch.float32)",
- dtype_str
- )));
- }
- }
- "angle" => {
- if !dtype_str_lower.contains("float64") {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype float64 for {} encoding, got
{}. \
- Use tensor.to(torch.float64)",
- method, dtype_str
- )));
- }
- }
- "basis" => {
- if !dtype_str_lower.contains("int64") {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype int64 for basis encoding, got
{}. \
- Use tensor.to(torch.int64)",
- dtype_str
- )));
- }
- }
- _ => {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor encoding currently only supports 'amplitude',
'angle', or 'basis' methods, got '{}'. \
- Use tensor.cpu() to convert to CPU tensor for other encoding
methods.",
- encoding_method
- )));
- }
- }
-
- // Check contiguous
- let is_contiguous: bool = tensor.call_method0("is_contiguous")?.extract()?;
- if !is_contiguous {
- return Err(PyRuntimeError::new_err(
- "CUDA tensor must be contiguous. Use tensor.contiguous()",
- ));
- }
-
- // Check non-empty
- let numel: usize = tensor.call_method0("numel")?.extract()?;
- if numel == 0 {
- return Err(PyRuntimeError::new_err("CUDA tensor cannot be empty"));
- }
-
- // Check device matches engine
- let tensor_device_id = get_tensor_device_id(tensor)?;
- if tensor_device_id as usize != expected_device_id {
- return Err(PyRuntimeError::new_err(format!(
- "Device mismatch: tensor is on cuda:{}, but engine is on cuda:{}. \
- Move tensor with tensor.to('cuda:{}')",
- tensor_device_id, expected_device_id, expected_device_id
- )));
- }
-
- Ok(())
-}
-
-/// Minimal CUDA tensor metadata extracted via PyTorch APIs.
-struct CudaTensorInfo {
- data_ptr: *const f64,
- shape: Vec<i64>,
-}
-
-/// Extract GPU pointer and shape directly from a PyTorch CUDA tensor.
-///
-/// # Safety
-/// The returned pointer is borrowed from the source tensor. The caller must
-/// ensure the tensor remains alive and unmodified for the duration of use.
-fn extract_cuda_tensor_info(tensor: &Bound<'_, PyAny>) ->
PyResult<CudaTensorInfo> {
- let data_ptr: u64 = tensor.call_method0("data_ptr")?.extract()?;
- if data_ptr == 0 {
- return Err(PyRuntimeError::new_err(
- "PyTorch returned a null data pointer for CUDA tensor",
- ));
- }
-
- let ndim: usize = tensor.call_method0("dim")?.extract()?;
- let mut shape = Vec::with_capacity(ndim);
- for axis in 0..ndim {
- let dim: i64 = tensor.call_method1("size", (axis,))?.extract()?;
- shape.push(dim);
- }
-
- Ok(CudaTensorInfo {
- data_ptr: data_ptr as *const f64,
- shape,
- })
-}
-
-/// DLPack tensor information extracted from a PyCapsule
-///
-/// This struct owns the DLManagedTensor pointer and ensures proper cleanup
-/// via the DLPack deleter when dropped (RAII pattern).
-struct DLPackTensorInfo {
- /// Raw DLManagedTensor pointer from PyTorch DLPack capsule
- /// This is owned by this struct and will be freed via deleter on drop
- managed_ptr: *mut DLManagedTensor,
- /// Data pointer inside dl_tensor (GPU memory, owned by managed_ptr)
- data_ptr: *const c_void,
- shape: Vec<i64>,
- /// CUDA device ID from DLPack metadata.
- /// Used for defensive validation against PyTorch API device ID.
- device_id: i32,
-}
-
-impl Drop for DLPackTensorInfo {
- fn drop(&mut self) {
- unsafe {
- if !self.managed_ptr.is_null() {
- // Per DLPack protocol: consumer must call deleter exactly once
- if let Some(deleter) = (*self.managed_ptr).deleter {
- deleter(self.managed_ptr);
- }
- // Prevent double-free
- self.managed_ptr = std::ptr::null_mut();
- }
- }
- }
-}
-
-/// Extract GPU pointer from PyTorch tensor's __dlpack__() capsule
-///
-/// Uses the DLPack protocol to obtain a zero-copy view of the tensor's GPU
memory.
-/// The returned `DLPackTensorInfo` owns the DLManagedTensor and will
automatically
-/// call the deleter when dropped, ensuring proper resource cleanup.
-///
-/// # Safety
-/// The returned `data_ptr` points to GPU memory owned by the source tensor.
-/// The caller must ensure the source tensor remains alive and unmodified
-/// for the entire duration that `data_ptr` is in use. Python's GIL ensures
-/// the tensor won't be garbage collected during `encode()`, but the caller
-/// must not deallocate or resize the tensor while encoding is in progress.
-fn extract_dlpack_tensor(_py: Python<'_>, tensor: &Bound<'_, PyAny>) ->
PyResult<DLPackTensorInfo> {
- // Call tensor.__dlpack__() to get PyCapsule
- // Note: PyTorch's __dlpack__ uses the default stream when called without
arguments
- let capsule = tensor.call_method0("__dlpack__")?;
-
- const DLTENSOR_NAME: &[u8] = b"dltensor\0";
-
- // SAFETY: capsule is a valid PyCapsule from tensor.__dlpack__().
DLTENSOR_NAME is a
- // null-terminated C string for the lifetime of the call. We only read the
capsule
- // and call PyCapsule_IsValid / PyCapsule_GetPointer; we do not invalidate
the capsule.
- let managed_ptr = unsafe {
- let capsule_ptr = capsule.as_ptr();
- if ffi::PyCapsule_IsValid(capsule_ptr, DLTENSOR_NAME.as_ptr() as
*const i8) == 0 {
- return Err(PyRuntimeError::new_err(
- "Invalid DLPack capsule (expected 'dltensor')",
- ));
- }
- let ptr = ffi::PyCapsule_GetPointer(capsule_ptr,
DLTENSOR_NAME.as_ptr() as *const i8)
- as *mut DLManagedTensor;
- if ptr.is_null() {
- return Err(PyRuntimeError::new_err(
- "Failed to extract DLManagedTensor from PyCapsule",
- ));
- }
- ptr
- };
-
- // SAFETY: managed_ptr is non-null and was returned by
PyCapsule_GetPointer for a valid
- // "dltensor" capsule, so it points to a valid DLManagedTensor. The
capsule (and thus
- // the tensor) is held by the caller for the duration of this function. We
read fields
- // and create slices from shape/strides only when non-null and ndim is
valid.
- unsafe {
- let dl_tensor = &(*managed_ptr).dl_tensor;
-
- if dl_tensor.data.is_null() {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor has null data pointer",
- ));
- }
-
- if dl_tensor.device.device_type != DLDeviceType::kDLCUDA {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be on CUDA device",
- ));
- }
-
- if dl_tensor.dtype.code != DL_FLOAT
- || dl_tensor.dtype.bits != 64
- || dl_tensor.dtype.lanes != 1
- {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be float64 (code={}, bits={}, lanes={})",
- dl_tensor.dtype.code, dl_tensor.dtype.bits,
dl_tensor.dtype.lanes
- )));
- }
-
- if !dl_tensor
- .byte_offset
- .is_multiple_of(std::mem::size_of::<f64>() as u64)
- {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor byte_offset is not aligned for float64",
- ));
- }
-
- let data_ptr =
- (dl_tensor.data as *const u8).add(dl_tensor.byte_offset as usize)
as *const f64;
-
- let ndim = dl_tensor.ndim as usize;
- // SAFETY: shape pointer is valid for ndim elements when non-null
(DLPack contract).
- let shape = if ndim > 0 && !dl_tensor.shape.is_null() {
- std::slice::from_raw_parts(dl_tensor.shape, ndim).to_vec()
- } else {
- vec![]
- };
-
- if ndim == 0 || shape.is_empty() {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must have at least 1 dimension",
- ));
- }
-
- if !dl_tensor.strides.is_null() {
- // SAFETY: strides pointer is valid for ndim elements (DLPack
contract).
- let strides = std::slice::from_raw_parts(dl_tensor.strides, ndim);
- match ndim {
- 1 => {
- let expected = 1_i64;
- if strides[0] != expected {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be contiguous: stride[0]={},
expected {}",
- strides[0], expected
- )));
- }
- }
- 2 => {
- if shape.len() < 2 {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be contiguous (shape len < 2)",
- ));
- }
- let expected_stride_1 = 1_i64;
- let expected_stride_0 = shape[1];
- if strides[1] != expected_stride_1 || strides[0] !=
expected_stride_0 {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be contiguous: strides=[{},
{}], expected [{}, {}] (expected[1]=shape[1])",
- strides[0], strides[1], expected_stride_0,
expected_stride_1
- )));
- }
- }
- _ => {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be 1D or 2D for encoding",
- ));
- }
- }
- }
-
- let device_id = dl_tensor.device.device_id;
-
- const USED_DLTENSOR_NAME: &[u8] = b"used_dltensor\0";
- // SAFETY: capsule is the same PyCapsule we used above; renaming is
allowed and does not free it.
- ffi::PyCapsule_SetName(capsule.as_ptr(), USED_DLTENSOR_NAME.as_ptr()
as *const i8);
-
- Ok(DLPackTensorInfo {
- managed_ptr,
- data_ptr: data_ptr as *const std::ffi::c_void,
- shape,
- device_id,
- })
- }
-}
+#[cfg(target_os = "linux")]
+use crate::loader::{PyQuantumLoader, config_from_args, path_from_py};
/// PyO3 wrapper for QdpEngine
///
/// Provides Python bindings for GPU-accelerated quantum state encoding.
#[pyclass]
-struct QdpEngine {
- engine: CoreEngine,
+pub struct QdpEngine {
+ pub engine: CoreEngine,
}
#[pymethods]
@@ -990,143 +461,28 @@ impl QdpEngine {
})
}
- // --- Loader factory methods (Linux only) ---
- #[cfg(target_os = "linux")]
- /// Create a synthetic-data pipeline iterator (for
QuantumDataLoader.source_synthetic()).
- #[pyo3(signature = (total_batches, batch_size, num_qubits,
encoding_method, seed=None))]
- fn create_synthetic_loader(
- &self,
- total_batches: usize,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- seed: Option<u64>,
- ) -> PyResult<PyQuantumLoader> {
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- total_batches,
- seed,
- );
- let iter =
qdp_core::PipelineIterator::new_synthetic(self.engine.clone(), config).map_err(
- |e| PyRuntimeError::new_err(format!("create_synthetic_loader
failed: {}", e)),
- )?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
- #[cfg(target_os = "linux")]
- /// Create a file-backed pipeline iterator (full read then batch; for
QuantumDataLoader.source_file(path)).
- #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
- fn create_file_loader(
- &self,
- py: Python<'_>,
- path: &Bound<'_, PyAny>,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- batch_limit: Option<usize>,
- ) -> PyResult<PyQuantumLoader> {
- let path_str = path_from_py(path)?;
- let batch_limit = batch_limit.unwrap_or(usize::MAX);
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- 0,
- None,
- );
- let engine = self.engine.clone();
- let iter = py
- .detach(|| {
- qdp_core::PipelineIterator::new_from_file(
- engine,
- path_str.as_str(),
- config,
- batch_limit,
- )
- })
- .map_err(|e| PyRuntimeError::new_err(format!("create_file_loader
failed: {}", e)))?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
- #[cfg(target_os = "linux")]
- /// Create a streaming Parquet pipeline iterator (for
QuantumDataLoader.source_file(path, streaming=True)).
- #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
- fn create_streaming_file_loader(
- &self,
- py: Python<'_>,
- path: &Bound<'_, PyAny>,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- batch_limit: Option<usize>,
- ) -> PyResult<PyQuantumLoader> {
- let path_str = path_from_py(path)?;
- let batch_limit = batch_limit.unwrap_or(usize::MAX);
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- 0,
- None,
- );
- let engine = self.engine.clone();
- let iter = py
- .detach(|| {
- qdp_core::PipelineIterator::new_from_file_streaming(
- engine,
- path_str.as_str(),
- config,
- batch_limit,
- )
- })
- .map_err(|e| {
- PyRuntimeError::new_err(format!("create_streaming_file_loader
failed: {}", e))
- })?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
/// Encode directly from a PyTorch CUDA tensor. Internal helper.
///
/// Dispatches to the core f32 GPU pointer API for 1D float32 amplitude
encoding,
/// or to the float64/basis GPU pointer APIs for other dtypes and batch
encoding.
- ///
- /// Args:
- /// data: PyTorch CUDA tensor
- /// num_qubits: Number of qubits
- /// encoding_method: Encoding strategy (currently only "amplitude")
fn _encode_from_cuda_tensor(
&self,
data: &Bound<'_, PyAny>,
num_qubits: usize,
encoding_method: &str,
) -> PyResult<QuantumTensor> {
- // Validate CUDA tensor for direct GPU encoding (shape, contiguity,
device, dtype)
validate_cuda_tensor_for_encoding(data,
self.engine.device().ordinal(), encoding_method)?;
- // Determine dtype for dispatch (float32 vs float64, etc.).
let dtype = data.getattr("dtype")?;
let dtype_str: String = dtype.str()?.extract()?;
let dtype_str_lower = dtype_str.to_ascii_lowercase();
let is_f32 = dtype_str_lower.contains("float32");
let method = encoding_method.to_ascii_lowercase();
-
- // Current f32 CUDA path only supports amplitude encoding for 1D
tensors.
let ndim: usize = data.call_method0("dim")?.extract()?;
if method.as_str() == "amplitude" && is_f32 {
- // NOTE: This f32 fast path intentionally bypasses
`extract_cuda_tensor_info`/DLPack
- // and uses PyTorch's `data_ptr()`/`numel()` directly, after
- // `validate_cuda_tensor_for_encoding` has already enforced
dtype/shape/contiguity/device.
- // If additional validation is added to `extract_cuda_tensor_info`
in the future, it must
- // be mirrored here to keep behavior consistent.
match ndim {
1 => {
- // 1D CUDA tensor, float32 amplitude encoding using core
f32 GPU pointer API.
let input_len: usize =
data.call_method0("numel")?.extract()?;
let stream_ptr = get_torch_cuda_stream_ptr(data)?;
let data_ptr_u64: u64 =
data.call_method0("data_ptr")?.extract()?;
@@ -1161,17 +517,12 @@ impl QdpEngine {
))),
}
} else {
- // Existing float64 (and basis/int64) CUDA path using direct GPU
pointer.
let tensor_info = extract_cuda_tensor_info(data)?;
let stream_ptr = get_torch_cuda_stream_ptr(data)?;
match ndim {
1 => {
- // 1D CUDA tensor: single sample encoding
let input_len = tensor_info.shape[0] as usize;
- // SAFETY: tensor_info.data_ptr was obtained via PyTorch's
data_ptr() from a
- // valid CUDA tensor. The tensor remains alive during this
call
- // (held by Python's GIL), and we validated
dtype/contiguity/device above.
let ptr = unsafe {
self.engine
.encode_from_gpu_ptr_with_stream(
@@ -1191,10 +542,8 @@ impl QdpEngine {
})
}
2 => {
- // 2D CUDA tensor: batch encoding
let num_samples = tensor_info.shape[0] as usize;
let sample_size = tensor_info.shape[1] as usize;
- // SAFETY: Same as above - pointer from validated PyTorch
CUDA tensor
let ptr = unsafe {
self.engine
.encode_batch_from_gpu_ptr_with_stream(
@@ -1222,106 +571,104 @@ impl QdpEngine {
}
}
}
-}
-
-// --- Loader bindings (Linux only; qdp-core pipeline types only built on
Linux) ---
-#[cfg(target_os = "linux")]
-mod loader_bindings {
- use super::*;
- use pyo3::exceptions::PyStopIteration;
- use qdp_core::{PipelineConfig, PipelineIterator};
-
- /// Rust-backed iterator yielding one QuantumTensor per batch; used by
QuantumDataLoader.
- #[pyclass]
- pub struct PyQuantumLoader {
- inner: Option<PipelineIterator>,
- }
-
- impl PyQuantumLoader {
- pub fn new(inner: Option<PipelineIterator>) -> Self {
- Self { inner }
- }
- }
-
- #[pymethods]
- impl PyQuantumLoader {
- fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
- slf
- }
-
- fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult<QuantumTensor> {
- let mut iter: PipelineIterator = match slf.inner.take() {
- Some(i) => i,
- None => return Err(PyStopIteration::new_err("")),
- };
- // Call next_batch without releasing GIL (return type *mut
DLManagedTensor is !Send).
- let result = iter.next_batch();
- match result {
- Ok(Some(ptr)) => {
- slf.inner = Some(iter);
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- Ok(None) => {
- // Exhausted; do not put iterator back
- Err(PyStopIteration::new_err(""))
- }
- Err(e) => {
- slf.inner = Some(iter);
- Err(PyRuntimeError::new_err(format!(
- "Pipeline next_batch failed: {}",
- e
- )))
- }
- }
- }
- }
- /// Build PipelineConfig from Python args. device_id is 0 (engine does not
expose it); iterator uses engine clone with correct device.
- pub fn config_from_args(
- _engine: &CoreEngine,
+ // --- Loader factory methods (Linux only) ---
+ #[cfg(target_os = "linux")]
+ /// Create a synthetic-data pipeline iterator (for
QuantumDataLoader.source_synthetic()).
+ #[pyo3(signature = (total_batches, batch_size, num_qubits,
encoding_method, seed=None))]
+ fn create_synthetic_loader(
+ &self,
+ total_batches: usize,
batch_size: usize,
num_qubits: u32,
encoding_method: &str,
- total_batches: usize,
seed: Option<u64>,
- ) -> PipelineConfig {
- PipelineConfig {
- device_id: 0,
- num_qubits,
+ ) -> PyResult<PyQuantumLoader> {
+ let config = config_from_args(
+ &self.engine,
batch_size,
+ num_qubits,
+ encoding_method,
total_batches,
- encoding_method: encoding_method.to_string(),
seed,
- warmup_batches: 0,
- }
+ );
+ let iter =
qdp_core::PipelineIterator::new_synthetic(self.engine.clone(), config).map_err(
+ |e| PyRuntimeError::new_err(format!("create_synthetic_loader
failed: {}", e)),
+ )?;
+ Ok(PyQuantumLoader::new(Some(iter)))
}
- /// Resolve path from Python str or pathlib.Path (__fspath__).
- pub fn path_from_py(path: &Bound<'_, PyAny>) -> PyResult<String> {
- path.extract::<String>().or_else(|_| {
- path.call_method0("__fspath__")
- .and_then(|m| m.extract::<String>())
- })
+ #[cfg(target_os = "linux")]
+ /// Create a file-backed pipeline iterator (full read then batch; for
QuantumDataLoader.source_file(path)).
+ #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
+ fn create_file_loader(
+ &self,
+ py: Python<'_>,
+ path: &Bound<'_, PyAny>,
+ batch_size: usize,
+ num_qubits: u32,
+ encoding_method: &str,
+ batch_limit: Option<usize>,
+ ) -> PyResult<PyQuantumLoader> {
+ let path_str = path_from_py(path)?;
+ let batch_limit = batch_limit.unwrap_or(usize::MAX);
+ let config = config_from_args(
+ &self.engine,
+ batch_size,
+ num_qubits,
+ encoding_method,
+ 0,
+ None,
+ );
+ let engine = self.engine.clone();
+ let iter = py
+ .detach(|| {
+ qdp_core::PipelineIterator::new_from_file(
+ engine,
+ path_str.as_str(),
+ config,
+ batch_limit,
+ )
+ })
+ .map_err(|e| PyRuntimeError::new_err(format!("create_file_loader
failed: {}", e)))?;
+ Ok(PyQuantumLoader::new(Some(iter)))
}
-}
-
-#[cfg(target_os = "linux")]
-use loader_bindings::{PyQuantumLoader, config_from_args, path_from_py};
-
-/// Quantum Data Plane (QDP) Python module
-///
-/// GPU-accelerated quantum data encoding with DLPack integration.
-#[pymodule]
-fn _qdp(m: &Bound<'_, PyModule>) -> PyResult<()> {
- // Respect RUST_LOG for Rust log output; try_init() is no-op if already
initialized.
- let _ = env_logger::Builder::from_default_env().try_init();
- m.add_class::<QdpEngine>()?;
- m.add_class::<QuantumTensor>()?;
#[cfg(target_os = "linux")]
- m.add_class::<PyQuantumLoader>()?;
- Ok(())
+ /// Create a streaming Parquet pipeline iterator (for
QuantumDataLoader.source_file(path, streaming=True)).
+ #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
+ fn create_streaming_file_loader(
+ &self,
+ py: Python<'_>,
+ path: &Bound<'_, PyAny>,
+ batch_size: usize,
+ num_qubits: u32,
+ encoding_method: &str,
+ batch_limit: Option<usize>,
+ ) -> PyResult<PyQuantumLoader> {
+ let path_str = path_from_py(path)?;
+ let batch_limit = batch_limit.unwrap_or(usize::MAX);
+ let config = config_from_args(
+ &self.engine,
+ batch_size,
+ num_qubits,
+ encoding_method,
+ 0,
+ None,
+ );
+ let engine = self.engine.clone();
+ let iter = py
+ .detach(|| {
+ qdp_core::PipelineIterator::new_from_file_streaming(
+ engine,
+ path_str.as_str(),
+ config,
+ batch_limit,
+ )
+ })
+ .map_err(|e| {
+ PyRuntimeError::new_err(format!("create_streaming_file_loader
failed: {}", e))
+ })?;
+ Ok(PyQuantumLoader::new(Some(iter)))
+ }
}
diff --git a/qdp/qdp-python/src/lib.rs b/qdp/qdp-python/src/lib.rs
index 12335cb72..34facaedb 100644
--- a/qdp/qdp-python/src/lib.rs
+++ b/qdp/qdp-python/src/lib.rs
@@ -14,1302 +14,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-use numpy::{PyReadonlyArray1, PyReadonlyArray2, PyUntypedArrayMethods};
-use pyo3::exceptions::PyRuntimeError;
-use pyo3::ffi;
-use pyo3::prelude::*;
-use qdp_core::dlpack::{DL_FLOAT, DLDeviceType, DLManagedTensor};
-use qdp_core::{Precision, QdpEngine as CoreEngine};
-use std::ffi::c_void;
-
-/// Quantum tensor wrapper implementing DLPack protocol
-///
-/// This class wraps a GPU-allocated quantum state vector and implements
-/// the DLPack protocol for zero-copy integration with PyTorch and other
-/// array libraries.
-///
-/// Example:
-/// >>> engine = QdpEngine(device_id=0)
-/// >>> qtensor = engine.encode([1.0, 2.0, 3.0], num_qubits=2,
encoding_method="amplitude")
-/// >>> torch_tensor = torch.from_dlpack(qtensor)
-#[pyclass]
-struct QuantumTensor {
- ptr: *mut DLManagedTensor,
- consumed: bool,
-}
-
-#[pymethods]
-impl QuantumTensor {
- /// Implements DLPack protocol - returns PyCapsule for PyTorch
- ///
- /// This method is called by torch.from_dlpack() to get the GPU memory
pointer.
- /// The capsule can only be consumed once to prevent double-free errors.
- ///
- /// Args:
- /// stream: Optional CUDA stream (DLPack 0.8+; 1=legacy default,
2=per-thread default)
- ///
- /// Returns:
- /// PyCapsule containing DLManagedTensor pointer
- ///
- /// Raises:
- /// RuntimeError: If the tensor has already been consumed
- #[pyo3(signature = (stream=None))]
- fn __dlpack__<'py>(&mut self, py: Python<'py>, stream: Option<i64>) ->
PyResult<Py<PyAny>> {
- if self.consumed {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor already consumed (can only be used once)",
- ));
- }
-
- if self.ptr.is_null() {
- return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
- }
-
- if let Some(stream) = stream
- && stream > 0
- {
- let stream_ptr = qdp_core::dlpack::dlpack_stream_to_cuda(stream);
- unsafe {
- qdp_core::dlpack::synchronize_stream(stream_ptr).map_err(|e| {
- PyRuntimeError::new_err(format!("CUDA stream sync failed:
{}", e))
- })?;
- }
- }
-
- // Mark as consumed to prevent double-free
- self.consumed = true;
-
- // Create PyCapsule using FFI
- // PyTorch will call the deleter stored in DLManagedTensor.deleter
- // Use a static C string for the capsule name to avoid lifetime issues
- const DLTENSOR_NAME: &[u8] = b"dltensor\0";
-
- unsafe {
- // Create PyCapsule without a destructor
- // PyTorch will manually call the deleter from DLManagedTensor
- let capsule_ptr = ffi::PyCapsule_New(
- self.ptr as *mut std::ffi::c_void,
- DLTENSOR_NAME.as_ptr() as *const i8,
- None, // No destructor - PyTorch handles it
- );
-
- if capsule_ptr.is_null() {
- return Err(PyRuntimeError::new_err("Failed to create
PyCapsule"));
- }
-
- Ok(Py::from_owned_ptr(py, capsule_ptr))
- }
- }
-
- /// Returns DLPack device information
- ///
- /// Returns:
- /// Tuple of (device_type, device_id) where device_type=2 for CUDA
- fn __dlpack_device__(&self) -> PyResult<(i32, i32)> {
- if self.ptr.is_null() {
- return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
- }
-
- unsafe {
- let tensor = &(*self.ptr).dl_tensor;
- // DLPack device_type: kDLCUDA = 2, kDLCPU = 1
- let device_type = match tensor.device.device_type {
- qdp_core::dlpack::DLDeviceType::kDLCUDA => 2,
- qdp_core::dlpack::DLDeviceType::kDLCPU => 1,
- };
- // Read device_id from DLPack tensor metadata
- Ok((device_type, tensor.device.device_id))
- }
- }
-}
+mod dlpack;
+mod engine;
+mod loader;
+mod pytorch;
+mod tensor;
-impl Drop for QuantumTensor {
- fn drop(&mut self) {
- // Only free if not consumed by __dlpack__
- // If consumed, PyTorch/consumer will call the deleter
- if !self.consumed && !self.ptr.is_null() {
- unsafe {
- // Defensive check: qdp-core always provides a deleter
- debug_assert!(
- (*self.ptr).deleter.is_some(),
- "DLManagedTensor from qdp-core should always have a
deleter"
- );
-
- // Call the DLPack deleter to free memory
- if let Some(deleter) = (*self.ptr).deleter {
- deleter(self.ptr);
- }
- }
- }
- }
-}
-
-// Safety: QuantumTensor can be sent between threads
-// The DLManagedTensor pointer management is thread-safe via Arc in the deleter
-unsafe impl Send for QuantumTensor {}
-unsafe impl Sync for QuantumTensor {}
-
-/// Helper to detect PyTorch tensor
-fn is_pytorch_tensor(obj: &Bound<'_, PyAny>) -> PyResult<bool> {
- let type_obj = obj.get_type();
- let name = type_obj.name()?;
- if name != "Tensor" {
- return Ok(false);
- }
- let module = type_obj.module()?;
- let module_name = module.to_str()?;
- Ok(module_name == "torch")
-}
-
-/// Helper to validate CPU tensor
-fn validate_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<()> {
- if !is_pytorch_tensor(tensor)? {
- return Err(PyRuntimeError::new_err("Object is not a PyTorch Tensor"));
- }
-
- let device = tensor.getattr("device")?;
- let device_type: String = device.getattr("type")?.extract()?;
-
- if device_type != "cpu" {
- return Err(PyRuntimeError::new_err(format!(
- "Only CPU tensors are currently supported for this path. Got
device: {}",
- device_type
- )));
- }
-
- Ok(())
-}
-
-/// Check if a PyTorch tensor is on a CUDA device
-fn is_cuda_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<bool> {
- let device = tensor.getattr("device")?;
- let device_type: String = device.getattr("type")?.extract()?;
- Ok(device_type == "cuda")
-}
-
-/// Validate array/tensor shape (must be 1D or 2D)
-///
-/// Args:
-/// ndim: Number of dimensions
-/// context: Context string for error message (e.g., "array", "tensor",
"CUDA tensor")
-///
-/// Returns:
-/// Ok(()) if shape is valid (1D or 2D), otherwise returns an error
-fn validate_shape(ndim: usize, context: &str) -> PyResult<()> {
- match ndim {
- 1 | 2 => Ok(()),
- _ => {
- let item_type = if context.contains("array") {
- "array"
- } else {
- "tensor"
- };
- Err(PyRuntimeError::new_err(format!(
- "Unsupported {} shape: {}D. Expected 1D {} for single sample \
- encoding or 2D {} (batch_size, features) for batch encoding.",
- context, ndim, item_type, item_type
- )))
- }
- }
-}
-
-/// Get the CUDA device index from a PyTorch tensor
-fn get_tensor_device_id(tensor: &Bound<'_, PyAny>) -> PyResult<i32> {
- let device = tensor.getattr("device")?;
- let device_index: i32 = device.getattr("index")?.extract()?;
- Ok(device_index)
-}
-
-/// Get the current CUDA stream pointer for the tensor's device.
-fn get_torch_cuda_stream_ptr(tensor: &Bound<'_, PyAny>) -> PyResult<*mut
c_void> {
- let py = tensor.py();
- let torch = PyModule::import(py, "torch")
- .map_err(|_| PyRuntimeError::new_err("Failed to import torch
module"))?;
- let cuda = torch.getattr("cuda")?;
- let device = tensor.getattr("device")?;
- let stream = cuda.call_method1("current_stream", (device,))?;
-
- // Defensive validation: ensure the stream is a CUDA stream on the same
device
- let stream_device = stream.getattr("device").map_err(|_| {
- PyRuntimeError::new_err("CUDA stream object from PyTorch is missing
'device' attribute")
- })?;
- let stream_device_type: String = stream_device
- .getattr("type")
- .and_then(|obj| obj.extract())
- .map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract CUDA stream device type from PyTorch
stream.device",
- )
- })?;
- if stream_device_type != "cuda" {
- return Err(PyRuntimeError::new_err(format!(
- "Expected CUDA stream device type 'cuda', got '{}'",
- stream_device_type
- )));
- }
-
- let stream_device_index: i32 = stream_device
- .getattr("index")
- .and_then(|obj| obj.extract())
- .map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract CUDA stream device index from PyTorch
stream.device",
- )
- })?;
- let tensor_device_index = get_tensor_device_id(tensor)?;
- if stream_device_index != tensor_device_index {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA stream device index ({}) does not match tensor device index
({})",
- stream_device_index, tensor_device_index
- )));
- }
-
- let stream_ptr: u64 = stream.getattr("cuda_stream")?.extract()?;
- Ok(if stream_ptr == 0 {
- std::ptr::null_mut()
- } else {
- stream_ptr as *mut c_void
- })
-}
-
-/// Validate a CUDA tensor for direct GPU encoding
-/// Checks: dtype matches encoding method, contiguous, non-empty, device_id
matches engine
-fn validate_cuda_tensor_for_encoding(
- tensor: &Bound<'_, PyAny>,
- expected_device_id: usize,
- encoding_method: &str,
-) -> PyResult<()> {
- let method = encoding_method.to_ascii_lowercase();
-
- // Check encoding method support and dtype (ASCII lowercase for
case-insensitive match).
- let dtype = tensor.getattr("dtype")?;
- let dtype_str: String = dtype.str()?.extract()?;
- let dtype_str_lower = dtype_str.to_ascii_lowercase();
- match method.as_str() {
- "amplitude" => {
- if !(dtype_str_lower.contains("float64") ||
dtype_str_lower.contains("float32")) {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype float64 or float32 for
amplitude encoding, got {}. \
- Use tensor.to(torch.float64) or tensor.to(torch.float32)",
- dtype_str
- )));
- }
- }
- "angle" => {
- if !dtype_str_lower.contains("float64") {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype float64 for {} encoding, got
{}. \
- Use tensor.to(torch.float64)",
- method, dtype_str
- )));
- }
- }
- "basis" => {
- if !dtype_str_lower.contains("int64") {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor must have dtype int64 for basis encoding, got
{}. \
- Use tensor.to(torch.int64)",
- dtype_str
- )));
- }
- }
- _ => {
- return Err(PyRuntimeError::new_err(format!(
- "CUDA tensor encoding currently only supports 'amplitude',
'angle', or 'basis' methods, got '{}'. \
- Use tensor.cpu() to convert to CPU tensor for other encoding
methods.",
- encoding_method
- )));
- }
- }
-
- // Check contiguous
- let is_contiguous: bool = tensor.call_method0("is_contiguous")?.extract()?;
- if !is_contiguous {
- return Err(PyRuntimeError::new_err(
- "CUDA tensor must be contiguous. Use tensor.contiguous()",
- ));
- }
-
- // Check non-empty
- let numel: usize = tensor.call_method0("numel")?.extract()?;
- if numel == 0 {
- return Err(PyRuntimeError::new_err("CUDA tensor cannot be empty"));
- }
-
- // Check device matches engine
- let tensor_device_id = get_tensor_device_id(tensor)?;
- if tensor_device_id as usize != expected_device_id {
- return Err(PyRuntimeError::new_err(format!(
- "Device mismatch: tensor is on cuda:{}, but engine is on cuda:{}. \
- Move tensor with tensor.to('cuda:{}')",
- tensor_device_id, expected_device_id, expected_device_id
- )));
- }
-
- Ok(())
-}
-
-/// Minimal CUDA tensor metadata extracted via PyTorch APIs.
-struct CudaTensorInfo {
- data_ptr: *const f64,
- shape: Vec<i64>,
-}
-
-/// Extract GPU pointer and shape directly from a PyTorch CUDA tensor.
-///
-/// # Safety
-/// The returned pointer is borrowed from the source tensor. The caller must
-/// ensure the tensor remains alive and unmodified for the duration of use.
-fn extract_cuda_tensor_info(tensor: &Bound<'_, PyAny>) ->
PyResult<CudaTensorInfo> {
- let data_ptr: u64 = tensor.call_method0("data_ptr")?.extract()?;
- if data_ptr == 0 {
- return Err(PyRuntimeError::new_err(
- "PyTorch returned a null data pointer for CUDA tensor",
- ));
- }
-
- let ndim: usize = tensor.call_method0("dim")?.extract()?;
- let mut shape = Vec::with_capacity(ndim);
- for axis in 0..ndim {
- let dim: i64 = tensor.call_method1("size", (axis,))?.extract()?;
- shape.push(dim);
- }
-
- Ok(CudaTensorInfo {
- data_ptr: data_ptr as *const f64,
- shape,
- })
-}
-
-/// DLPack tensor information extracted from a PyCapsule
-///
-/// This struct owns the DLManagedTensor pointer and ensures proper cleanup
-/// via the DLPack deleter when dropped (RAII pattern).
-struct DLPackTensorInfo {
- /// Raw DLManagedTensor pointer from PyTorch DLPack capsule
- /// This is owned by this struct and will be freed via deleter on drop
- managed_ptr: *mut DLManagedTensor,
- /// Data pointer inside dl_tensor (GPU memory, owned by managed_ptr)
- data_ptr: *const c_void,
- shape: Vec<i64>,
- /// CUDA device ID from DLPack metadata.
- /// Used for defensive validation against PyTorch API device ID.
- device_id: i32,
-}
-
-impl Drop for DLPackTensorInfo {
- fn drop(&mut self) {
- unsafe {
- if !self.managed_ptr.is_null() {
- // Per DLPack protocol: consumer must call deleter exactly once
- if let Some(deleter) = (*self.managed_ptr).deleter {
- deleter(self.managed_ptr);
- }
- // Prevent double-free
- self.managed_ptr = std::ptr::null_mut();
- }
- }
- }
-}
-
-/// Extract GPU pointer from PyTorch tensor's __dlpack__() capsule
-///
-/// Uses the DLPack protocol to obtain a zero-copy view of the tensor's GPU
memory.
-/// The returned `DLPackTensorInfo` owns the DLManagedTensor and will
automatically
-/// call the deleter when dropped, ensuring proper resource cleanup.
-///
-/// # Safety
-/// The returned `data_ptr` points to GPU memory owned by the source tensor.
-/// The caller must ensure the source tensor remains alive and unmodified
-/// for the entire duration that `data_ptr` is in use. Python's GIL ensures
-/// the tensor won't be garbage collected during `encode()`, but the caller
-/// must not deallocate or resize the tensor while encoding is in progress.
-fn extract_dlpack_tensor(_py: Python<'_>, tensor: &Bound<'_, PyAny>) ->
PyResult<DLPackTensorInfo> {
- // Call tensor.__dlpack__() to get PyCapsule
- // Note: PyTorch's __dlpack__ uses the default stream when called without
arguments
- let capsule = tensor.call_method0("__dlpack__")?;
-
- const DLTENSOR_NAME: &[u8] = b"dltensor\0";
-
- // SAFETY: capsule is a valid PyCapsule from tensor.__dlpack__().
DLTENSOR_NAME is a
- // null-terminated C string for the lifetime of the call. We only read the
capsule
- // and call PyCapsule_IsValid / PyCapsule_GetPointer; we do not invalidate
the capsule.
- let managed_ptr = unsafe {
- let capsule_ptr = capsule.as_ptr();
- if ffi::PyCapsule_IsValid(capsule_ptr, DLTENSOR_NAME.as_ptr() as
*const i8) == 0 {
- return Err(PyRuntimeError::new_err(
- "Invalid DLPack capsule (expected 'dltensor')",
- ));
- }
- let ptr = ffi::PyCapsule_GetPointer(capsule_ptr,
DLTENSOR_NAME.as_ptr() as *const i8)
- as *mut DLManagedTensor;
- if ptr.is_null() {
- return Err(PyRuntimeError::new_err(
- "Failed to extract DLManagedTensor from PyCapsule",
- ));
- }
- ptr
- };
-
- // SAFETY: managed_ptr is non-null and was returned by
PyCapsule_GetPointer for a valid
- // "dltensor" capsule, so it points to a valid DLManagedTensor. The
capsule (and thus
- // the tensor) is held by the caller for the duration of this function. We
read fields
- // and create slices from shape/strides only when non-null and ndim is
valid.
- unsafe {
- let dl_tensor = &(*managed_ptr).dl_tensor;
-
- if dl_tensor.data.is_null() {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor has null data pointer",
- ));
- }
-
- if dl_tensor.device.device_type != DLDeviceType::kDLCUDA {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be on CUDA device",
- ));
- }
-
- if dl_tensor.dtype.code != DL_FLOAT
- || dl_tensor.dtype.bits != 64
- || dl_tensor.dtype.lanes != 1
- {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be float64 (code={}, bits={}, lanes={})",
- dl_tensor.dtype.code, dl_tensor.dtype.bits,
dl_tensor.dtype.lanes
- )));
- }
-
- if !dl_tensor
- .byte_offset
- .is_multiple_of(std::mem::size_of::<f64>() as u64)
- {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor byte_offset is not aligned for float64",
- ));
- }
-
- let data_ptr =
- (dl_tensor.data as *const u8).add(dl_tensor.byte_offset as usize)
as *const f64;
-
- let ndim = dl_tensor.ndim as usize;
- // SAFETY: shape pointer is valid for ndim elements when non-null
(DLPack contract).
- let shape = if ndim > 0 && !dl_tensor.shape.is_null() {
- std::slice::from_raw_parts(dl_tensor.shape, ndim).to_vec()
- } else {
- vec![]
- };
-
- if ndim == 0 || shape.is_empty() {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must have at least 1 dimension",
- ));
- }
-
- if !dl_tensor.strides.is_null() {
- // SAFETY: strides pointer is valid for ndim elements (DLPack
contract).
- let strides = std::slice::from_raw_parts(dl_tensor.strides, ndim);
- match ndim {
- 1 => {
- let expected = 1_i64;
- if strides[0] != expected {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be contiguous: stride[0]={},
expected {}",
- strides[0], expected
- )));
- }
- }
- 2 => {
- if shape.len() < 2 {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be contiguous (shape len < 2)",
- ));
- }
- let expected_stride_1 = 1_i64;
- let expected_stride_0 = shape[1];
- if strides[1] != expected_stride_1 || strides[0] !=
expected_stride_0 {
- return Err(PyRuntimeError::new_err(format!(
- "DLPack tensor must be contiguous: strides=[{},
{}], expected [{}, {}] (expected[1]=shape[1])",
- strides[0], strides[1], expected_stride_0,
expected_stride_1
- )));
- }
- }
- _ => {
- return Err(PyRuntimeError::new_err(
- "DLPack tensor must be 1D or 2D for encoding",
- ));
- }
- }
- }
-
- let device_id = dl_tensor.device.device_id;
-
- const USED_DLTENSOR_NAME: &[u8] = b"used_dltensor\0";
- // SAFETY: capsule is the same PyCapsule we used above; renaming is
allowed and does not free it.
- ffi::PyCapsule_SetName(capsule.as_ptr(), USED_DLTENSOR_NAME.as_ptr()
as *const i8);
-
- Ok(DLPackTensorInfo {
- managed_ptr,
- data_ptr: data_ptr as *const std::ffi::c_void,
- shape,
- device_id,
- })
- }
-}
-
-/// PyO3 wrapper for QdpEngine
-///
-/// Provides Python bindings for GPU-accelerated quantum state encoding.
-#[pyclass]
-struct QdpEngine {
- engine: CoreEngine,
-}
-
-#[pymethods]
-impl QdpEngine {
- /// Initialize QDP engine on specified GPU device
- ///
- /// Args:
- /// device_id: CUDA device ID (typically 0)
- /// precision: Output precision ("float32" default, or "float64")
- ///
- /// Returns:
- /// QdpEngine instance
- ///
- /// Raises:
- /// RuntimeError: If CUDA device initialization fails
- #[new]
- #[pyo3(signature = (device_id=0, precision="float32"))]
- fn new(device_id: usize, precision: &str) -> PyResult<Self> {
- let precision = match precision.to_ascii_lowercase().as_str() {
- "float32" | "f32" | "float" => Precision::Float32,
- "float64" | "f64" | "double" => Precision::Float64,
- other => {
- return Err(PyRuntimeError::new_err(format!(
- "Unsupported precision '{}'. Use 'float32' (default) or
'float64'.",
- other
- )));
- }
- };
-
- let engine = CoreEngine::new_with_precision(device_id, precision)
- .map_err(|e| PyRuntimeError::new_err(format!("Failed to
initialize: {}", e)))?;
- Ok(Self { engine })
- }
-
- /// Encode classical data into quantum state (auto-detects input type)
- ///
- /// Args:
- /// data: Input data - supports:
- /// - Python list: [1.0, 2.0, 3.0, 4.0]
- /// - NumPy array: 1D (single sample) or 2D (batch) array
- /// - PyTorch tensor: CPU tensor (float64 recommended; will be
copied to GPU)
- /// - String path: .parquet, .arrow, .feather, .npy, .pt, .pth,
.pb file
- /// - pathlib.Path: Path object (converted via os.fspath())
- /// num_qubits: Number of qubits for encoding
- /// encoding_method: Encoding strategy ("amplitude" default, "angle",
or "basis")
- ///
- /// Returns:
- /// QuantumTensor: DLPack-compatible tensor for zero-copy PyTorch
integration
- /// Shape: [batch_size, 2^num_qubits]
- ///
- /// Example:
- /// >>> engine = QdpEngine(0)
- /// >>> # From list
- /// >>> tensor = engine.encode([1.0, 2.0, 3.0, 4.0], 2)
- /// >>> # From NumPy batch
- /// >>> tensor = engine.encode(np.random.randn(64, 4), 2)
- /// >>> # From file path string
- /// >>> tensor = engine.encode("data.parquet", 10)
- /// >>> # From pathlib.Path
- /// >>> from pathlib import Path
- /// >>> tensor = engine.encode(Path("data.npy"), 10)
- #[pyo3(signature = (data, num_qubits, encoding_method="amplitude"))]
- fn encode(
- &self,
- data: &Bound<'_, PyAny>,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- // Check if it's a string path
- if let Ok(path) = data.extract::<String>() {
- return self.encode_from_file(&path, num_qubits, encoding_method);
- }
-
- // Check if it's a pathlib.Path or os.PathLike object (has __fspath__
method)
- if data.hasattr("__fspath__")? {
- let path: String = data.call_method0("__fspath__")?.extract()?;
- return self.encode_from_file(&path, num_qubits, encoding_method);
- }
-
- // Check if it's a NumPy array
- if data.hasattr("__array_interface__")? {
- return self.encode_from_numpy(data, num_qubits, encoding_method);
- }
-
- // Check if it's a PyTorch tensor
- if is_pytorch_tensor(data)? {
- // Check if it's a CUDA tensor - use zero-copy GPU encoding
- if is_cuda_tensor(data)? {
- return self._encode_from_cuda_tensor(data, num_qubits,
encoding_method);
- }
- // CPU PyTorch tensor path
- return self.encode_from_pytorch(data, num_qubits, encoding_method);
- }
-
- // Fallback: try to extract as Vec<f64> (Python list)
- self.encode_from_list(data, num_qubits, encoding_method)
- }
-
- /// Encode from NumPy array (1D or 2D)
- fn encode_from_numpy(
- &self,
- data: &Bound<'_, PyAny>,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- let ndim: usize = data.getattr("ndim")?.extract()?;
- validate_shape(ndim, "array")?;
-
- match ndim {
- 1 => {
- // 1D array: single sample encoding (zero-copy if already
contiguous)
- let array_1d =
data.extract::<PyReadonlyArray1<f64>>().map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract 1D NumPy array. Ensure dtype is
float64.",
- )
- })?;
- let data_slice = array_1d.as_slice().map_err(|_| {
- PyRuntimeError::new_err("NumPy array must be contiguous
(C-order)")
- })?;
- let ptr = self
- .engine
- .encode(data_slice, num_qubits, encoding_method)
- .map_err(|e| PyRuntimeError::new_err(format!("Encoding
failed: {}", e)))?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- 2 => {
- // 2D array: batch encoding (zero-copy if already contiguous)
- let array_2d =
data.extract::<PyReadonlyArray2<f64>>().map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract 2D NumPy array. Ensure dtype is
float64.",
- )
- })?;
- let shape = array_2d.shape();
- let num_samples = shape[0];
- let sample_size = shape[1];
- let data_slice = array_2d.as_slice().map_err(|_| {
- PyRuntimeError::new_err("NumPy array must be contiguous
(C-order)")
- })?;
- let ptr = self
- .engine
- .encode_batch(
- data_slice,
- num_samples,
- sample_size,
- num_qubits,
- encoding_method,
- )
- .map_err(|e| PyRuntimeError::new_err(format!("Encoding
failed: {}", e)))?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- _ => unreachable!("validate_shape() should have caught invalid
ndim"),
- }
- }
-
- /// Encode from PyTorch tensor (1D or 2D)
- fn encode_from_pytorch(
- &self,
- data: &Bound<'_, PyAny>,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- // Check if it's a CUDA tensor - use zero-copy GPU encoding via DLPack
- if is_cuda_tensor(data)? {
- // Validate CUDA tensor for direct GPU encoding
- validate_cuda_tensor_for_encoding(
- data,
- self.engine.device().ordinal(),
- encoding_method,
- )?;
-
- // Extract GPU pointer via DLPack (RAII wrapper ensures deleter is
called)
- let dlpack_info = extract_dlpack_tensor(data.py(), data)?;
-
- // ensure PyTorch API and DLPack metadata agree on device ID
- let pytorch_device_id = get_tensor_device_id(data)?;
- if dlpack_info.device_id != pytorch_device_id {
- return Err(PyRuntimeError::new_err(format!(
- "Device ID mismatch: PyTorch reports device {}, but DLPack
metadata reports {}. \
- This indicates an inconsistency between PyTorch and
DLPack device information.",
- pytorch_device_id, dlpack_info.device_id
- )));
- }
-
- let ndim: usize = data.call_method0("dim")?.extract()?;
- validate_shape(ndim, "CUDA tensor")?;
-
- match ndim {
- 1 => {
- // 1D CUDA tensor: single sample encoding
- let input_len = dlpack_info.shape[0] as usize;
- // SAFETY: dlpack_info.data_ptr was validated via DLPack
protocol from a
- // valid PyTorch CUDA tensor. The tensor remains alive
during this call
- // (held by Python's GIL), and we validated
dtype/contiguity/device above.
- // The DLPackTensorInfo RAII wrapper will call deleter
when dropped.
- let ptr = unsafe {
- self.engine
- .encode_from_gpu_ptr(
- dlpack_info.data_ptr,
- input_len,
- num_qubits,
- encoding_method,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- return Ok(QuantumTensor {
- ptr,
- consumed: false,
- });
- }
- 2 => {
- // 2D CUDA tensor: batch encoding
- let num_samples = dlpack_info.shape[0] as usize;
- let sample_size = dlpack_info.shape[1] as usize;
- // SAFETY: Same as above - pointer from validated DLPack
tensor
- let ptr = unsafe {
- self.engine
- .encode_batch_from_gpu_ptr(
- dlpack_info.data_ptr,
- num_samples,
- sample_size,
- num_qubits,
- encoding_method,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- return Ok(QuantumTensor {
- ptr,
- consumed: false,
- });
- }
- _ => unreachable!("validate_shape() should have caught invalid
ndim"),
- }
- }
-
- // CPU tensor path
- validate_tensor(data)?;
- // PERF: Avoid Tensor -> Python list -> Vec deep copies.
- //
- // For CPU tensors, `tensor.detach().numpy()` returns a NumPy view
that shares the same
- // underlying memory (zero-copy) when the tensor is C-contiguous. We
can then borrow a
- // `&[f64]` directly via pyo3-numpy.
- let ndim: usize = data.call_method0("dim")?.extract()?;
- validate_shape(ndim, "tensor")?;
- let numpy_view = data
- .call_method0("detach")?
- .call_method0("numpy")
- .map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to convert torch.Tensor to NumPy view. Ensure the
tensor is on CPU \
- and does not require grad (try: tensor =
tensor.detach().cpu())",
- )
- })?;
-
- match ndim {
- 1 => {
- // 1D tensor: single sample encoding
- let array_1d =
numpy_view.extract::<PyReadonlyArray1<f64>>().map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract NumPy view as float64 array. Ensure
dtype is float64 \
- (try: tensor = tensor.to(torch.float64))",
- )
- })?;
- let data_slice = array_1d.as_slice().map_err(|_| {
- PyRuntimeError::new_err(
- "Tensor must be contiguous (C-order) to get zero-copy
slice \
- (try: tensor = tensor.contiguous())",
- )
- })?;
- let ptr = self
- .engine
- .encode(data_slice, num_qubits, encoding_method)
- .map_err(|e| PyRuntimeError::new_err(format!("Encoding
failed: {}", e)))?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- 2 => {
- // 2D tensor: batch encoding
- let array_2d =
numpy_view.extract::<PyReadonlyArray2<f64>>().map_err(|_| {
- PyRuntimeError::new_err(
- "Failed to extract NumPy view as float64 array. Ensure
dtype is float64 \
- (try: tensor = tensor.to(torch.float64))",
- )
- })?;
- let shape = array_2d.shape();
- let num_samples = shape[0];
- let sample_size = shape[1];
- let data_slice = array_2d.as_slice().map_err(|_| {
- PyRuntimeError::new_err(
- "Tensor must be contiguous (C-order) to get zero-copy
slice \
- (try: tensor = tensor.contiguous())",
- )
- })?;
- let ptr = self
- .engine
- .encode_batch(
- data_slice,
- num_samples,
- sample_size,
- num_qubits,
- encoding_method,
- )
- .map_err(|e| PyRuntimeError::new_err(format!("Encoding
failed: {}", e)))?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- _ => unreachable!("validate_shape() should have caught invalid
ndim"),
- }
- }
-
- /// Encode from Python list
- fn encode_from_list(
- &self,
- data: &Bound<'_, PyAny>,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- let vec_data = data.extract::<Vec<f64>>().map_err(|_| {
- PyRuntimeError::new_err(
- "Unsupported data type. Expected: list, NumPy array, PyTorch
tensor, or file path",
- )
- })?;
- let ptr = self
- .engine
- .encode(&vec_data, num_qubits, encoding_method)
- .map_err(|e| PyRuntimeError::new_err(format!("Encoding failed:
{}", e)))?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
-
- /// Internal helper to encode from file based on extension
- fn encode_from_file(
- &self,
- path: &str,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- let ptr = if path.ends_with(".parquet") {
- self.engine
- .encode_from_parquet(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from parquet
failed: {}", e))
- })?
- } else if path.ends_with(".arrow") || path.ends_with(".feather") {
- self.engine
- .encode_from_arrow_ipc(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from Arrow IPC
failed: {}", e))
- })?
- } else if path.ends_with(".npy") {
- self.engine
- .encode_from_numpy(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from NumPy
failed: {}", e))
- })?
- } else if path.ends_with(".pt") || path.ends_with(".pth") {
- self.engine
- .encode_from_torch(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from PyTorch
failed: {}", e))
- })?
- } else if path.ends_with(".pb") {
- self.engine
- .encode_from_tensorflow(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from TensorFlow
failed: {}", e))
- })?
- } else {
- return Err(PyRuntimeError::new_err(format!(
- "Unsupported file format. Expected .parquet, .arrow, .feather,
.npy, .pt, .pth, or .pb, got: {}",
- path
- )));
- };
-
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
-
- /// Encode from TensorFlow TensorProto file
- ///
- /// Args:
- /// path: Path to TensorProto file (.pb)
- /// num_qubits: Number of qubits for encoding
- /// encoding_method: Encoding strategy (currently only "amplitude")
- ///
- /// Returns:
- /// QuantumTensor: DLPack tensor containing all encoded states
- ///
- /// Example:
- /// >>> engine = QdpEngine(device_id=0)
- /// >>> batched = engine.encode_from_tensorflow("data.pb", 16,
"amplitude")
- /// >>> torch_tensor = torch.from_dlpack(batched) # Shape: [200,
65536]
- fn encode_from_tensorflow(
- &self,
- path: &str,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- let ptr = self
- .engine
- .encode_from_tensorflow(path, num_qubits, encoding_method)
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding from TensorFlow
failed: {}", e))
- })?;
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
-
- // --- Loader factory methods (Linux only) ---
- #[cfg(target_os = "linux")]
- /// Create a synthetic-data pipeline iterator (for
QuantumDataLoader.source_synthetic()).
- #[pyo3(signature = (total_batches, batch_size, num_qubits,
encoding_method, seed=None))]
- fn create_synthetic_loader(
- &self,
- total_batches: usize,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- seed: Option<u64>,
- ) -> PyResult<PyQuantumLoader> {
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- total_batches,
- seed,
- );
- let iter =
qdp_core::PipelineIterator::new_synthetic(self.engine.clone(), config).map_err(
- |e| PyRuntimeError::new_err(format!("create_synthetic_loader
failed: {}", e)),
- )?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
- #[cfg(target_os = "linux")]
- /// Create a file-backed pipeline iterator (full read then batch; for
QuantumDataLoader.source_file(path)).
- #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
- fn create_file_loader(
- &self,
- py: Python<'_>,
- path: &Bound<'_, PyAny>,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- batch_limit: Option<usize>,
- ) -> PyResult<PyQuantumLoader> {
- let path_str = path_from_py(path)?;
- let batch_limit = batch_limit.unwrap_or(usize::MAX);
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- 0,
- None,
- );
- let engine = self.engine.clone();
- let iter = py
- .detach(|| {
- qdp_core::PipelineIterator::new_from_file(
- engine,
- path_str.as_str(),
- config,
- batch_limit,
- )
- })
- .map_err(|e| PyRuntimeError::new_err(format!("create_file_loader
failed: {}", e)))?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
- #[cfg(target_os = "linux")]
- /// Create a streaming Parquet pipeline iterator (for
QuantumDataLoader.source_file(path, streaming=True)).
- #[pyo3(signature = (path, batch_size, num_qubits, encoding_method,
batch_limit=None))]
- fn create_streaming_file_loader(
- &self,
- py: Python<'_>,
- path: &Bound<'_, PyAny>,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- batch_limit: Option<usize>,
- ) -> PyResult<PyQuantumLoader> {
- let path_str = path_from_py(path)?;
- let batch_limit = batch_limit.unwrap_or(usize::MAX);
- let config = config_from_args(
- &self.engine,
- batch_size,
- num_qubits,
- encoding_method,
- 0,
- None,
- );
- let engine = self.engine.clone();
- let iter = py
- .detach(|| {
- qdp_core::PipelineIterator::new_from_file_streaming(
- engine,
- path_str.as_str(),
- config,
- batch_limit,
- )
- })
- .map_err(|e| {
- PyRuntimeError::new_err(format!("create_streaming_file_loader
failed: {}", e))
- })?;
- Ok(PyQuantumLoader::new(Some(iter)))
- }
-
- /// Encode directly from a PyTorch CUDA tensor. Internal helper.
- ///
- /// Dispatches to the core f32 GPU pointer API for 1D float32 amplitude
encoding,
- /// or to the float64/basis GPU pointer APIs for other dtypes and batch
encoding.
- ///
- /// Args:
- /// data: PyTorch CUDA tensor
- /// num_qubits: Number of qubits
- /// encoding_method: Encoding strategy (currently only "amplitude")
- fn _encode_from_cuda_tensor(
- &self,
- data: &Bound<'_, PyAny>,
- num_qubits: usize,
- encoding_method: &str,
- ) -> PyResult<QuantumTensor> {
- // Validate CUDA tensor for direct GPU encoding (shape, contiguity,
device, dtype)
- validate_cuda_tensor_for_encoding(data,
self.engine.device().ordinal(), encoding_method)?;
-
- // Determine dtype for dispatch (float32 vs float64, etc.).
- let dtype = data.getattr("dtype")?;
- let dtype_str: String = dtype.str()?.extract()?;
- let dtype_str_lower = dtype_str.to_ascii_lowercase();
- let is_f32 = dtype_str_lower.contains("float32");
- let method = encoding_method.to_ascii_lowercase();
-
- // Current f32 CUDA path only supports amplitude encoding for 1D
tensors.
- let ndim: usize = data.call_method0("dim")?.extract()?;
-
- if method.as_str() == "amplitude" && is_f32 {
- // NOTE: This f32 fast path intentionally bypasses
`extract_cuda_tensor_info`/DLPack
- // and uses PyTorch's `data_ptr()`/`numel()` directly, after
- // `validate_cuda_tensor_for_encoding` has already enforced
dtype/shape/contiguity/device.
- // If additional validation is added to `extract_cuda_tensor_info`
in the future, it must
- // be mirrored here to keep behavior consistent.
- match ndim {
- 1 => {
- // 1D CUDA tensor, float32 amplitude encoding using core
f32 GPU pointer API.
- let input_len: usize =
data.call_method0("numel")?.extract()?;
- let stream_ptr = get_torch_cuda_stream_ptr(data)?;
- let data_ptr_u64: u64 =
data.call_method0("data_ptr")?.extract()?;
- let data_ptr = data_ptr_u64 as *const f32;
-
- let ptr = unsafe {
- self.engine
- .encode_from_gpu_ptr_f32_with_stream(
- data_ptr, input_len, num_qubits, stream_ptr,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!(
- "Encoding failed (float32 amplitude): {}",
- e
- ))
- })?
- };
-
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- 2 => Err(PyRuntimeError::new_err(
- "CUDA float32 batch amplitude encoding is not yet
supported. \
- Use float64 (tensor.to(torch.float64)) or encode samples
individually.",
- )),
- _ => Err(PyRuntimeError::new_err(format!(
- "Unsupported CUDA tensor shape: {}D. Expected 1D tensor
for single \
- sample encoding or 2D tensor (batch_size, features) for
batch encoding.",
- ndim
- ))),
- }
- } else {
- // Existing float64 (and basis/int64) CUDA path using direct GPU
pointer.
- let tensor_info = extract_cuda_tensor_info(data)?;
- let stream_ptr = get_torch_cuda_stream_ptr(data)?;
-
- match ndim {
- 1 => {
- // 1D CUDA tensor: single sample encoding
- let input_len = tensor_info.shape[0] as usize;
- // SAFETY: tensor_info.data_ptr was obtained via PyTorch's
data_ptr() from a
- // valid CUDA tensor. The tensor remains alive during this
call
- // (held by Python's GIL), and we validated
dtype/contiguity/device above.
- let ptr = unsafe {
- self.engine
- .encode_from_gpu_ptr_with_stream(
- tensor_info.data_ptr as *const
std::ffi::c_void,
- input_len,
- num_qubits,
- encoding_method,
- stream_ptr,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- 2 => {
- // 2D CUDA tensor: batch encoding
- let num_samples = tensor_info.shape[0] as usize;
- let sample_size = tensor_info.shape[1] as usize;
- // SAFETY: Same as above - pointer from validated PyTorch
CUDA tensor
- let ptr = unsafe {
- self.engine
- .encode_batch_from_gpu_ptr_with_stream(
- tensor_info.data_ptr as *const
std::ffi::c_void,
- num_samples,
- sample_size,
- num_qubits,
- encoding_method,
- stream_ptr,
- )
- .map_err(|e| {
- PyRuntimeError::new_err(format!("Encoding
failed: {}", e))
- })?
- };
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- _ => Err(PyRuntimeError::new_err(format!(
- "Unsupported CUDA tensor shape: {}D. Expected 1D tensor
for single \
- sample encoding or 2D tensor (batch_size, features) for
batch encoding.",
- ndim
- ))),
- }
- }
- }
-}
-
-// --- Loader bindings (Linux only; qdp-core pipeline types only built on
Linux) ---
-#[cfg(target_os = "linux")]
-mod loader_bindings {
- use super::*;
- use pyo3::exceptions::PyStopIteration;
- use qdp_core::{PipelineConfig, PipelineIterator};
-
- /// Rust-backed iterator yielding one QuantumTensor per batch; used by
QuantumDataLoader.
- #[pyclass]
- pub struct PyQuantumLoader {
- inner: Option<PipelineIterator>,
- }
-
- impl PyQuantumLoader {
- pub fn new(inner: Option<PipelineIterator>) -> Self {
- Self { inner }
- }
- }
-
- #[pymethods]
- impl PyQuantumLoader {
- fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
- slf
- }
-
- fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult<QuantumTensor> {
- let mut iter: PipelineIterator = match slf.inner.take() {
- Some(i) => i,
- None => return Err(PyStopIteration::new_err("")),
- };
- // Call next_batch without releasing GIL (return type *mut
DLManagedTensor is !Send).
- let result = iter.next_batch();
- match result {
- Ok(Some(ptr)) => {
- slf.inner = Some(iter);
- Ok(QuantumTensor {
- ptr,
- consumed: false,
- })
- }
- Ok(None) => {
- // Exhausted; do not put iterator back
- Err(PyStopIteration::new_err(""))
- }
- Err(e) => {
- slf.inner = Some(iter);
- Err(PyRuntimeError::new_err(format!(
- "Pipeline next_batch failed: {}",
- e
- )))
- }
- }
- }
- }
-
- /// Build PipelineConfig from Python args. device_id is 0 (engine does not
expose it); iterator uses engine clone with correct device.
- pub fn config_from_args(
- _engine: &CoreEngine,
- batch_size: usize,
- num_qubits: u32,
- encoding_method: &str,
- total_batches: usize,
- seed: Option<u64>,
- ) -> PipelineConfig {
- PipelineConfig {
- device_id: 0,
- num_qubits,
- batch_size,
- total_batches,
- encoding_method: encoding_method.to_string(),
- seed,
- warmup_batches: 0,
- }
- }
-
- /// Resolve path from Python str or pathlib.Path (__fspath__).
- pub fn path_from_py(path: &Bound<'_, PyAny>) -> PyResult<String> {
- path.extract::<String>().or_else(|_| {
- path.call_method0("__fspath__")
- .and_then(|m| m.extract::<String>())
- })
- }
-}
+use engine::QdpEngine;
+use pyo3::prelude::*;
+use tensor::QuantumTensor;
#[cfg(target_os = "linux")]
-use loader_bindings::{PyQuantumLoader, config_from_args, path_from_py};
+use loader::PyQuantumLoader;
/// Quantum Data Plane (QDP) Python module
///
diff --git a/qdp/qdp-python/src/loader.rs b/qdp/qdp-python/src/loader.rs
new file mode 100644
index 000000000..2806b17af
--- /dev/null
+++ b/qdp/qdp-python/src/loader.rs
@@ -0,0 +1,103 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Loader bindings (Linux only; qdp-core pipeline types only built on Linux)
+#[cfg(target_os = "linux")]
+mod loader_impl {
+ use crate::tensor::QuantumTensor;
+ use pyo3::exceptions::PyRuntimeError;
+ use pyo3::prelude::*;
+ use qdp_core::{PipelineConfig, PipelineIterator, QdpEngine as CoreEngine};
+
+ /// Rust-backed iterator yielding one QuantumTensor per batch; used by
QuantumDataLoader.
+ #[pyclass]
+ pub struct PyQuantumLoader {
+ pub inner: Option<PipelineIterator>,
+ }
+
+ impl PyQuantumLoader {
+ pub fn new(inner: Option<PipelineIterator>) -> Self {
+ Self { inner }
+ }
+ }
+
+ #[pymethods]
+ impl PyQuantumLoader {
+ fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
+ slf
+ }
+
+ fn __next__(mut slf: PyRefMut<'_, Self>) -> PyResult<QuantumTensor> {
+ let mut iter: PipelineIterator = match slf.inner.take() {
+ Some(i) => i,
+ None => return
Err(pyo3::exceptions::PyStopIteration::new_err("")),
+ };
+ // Call next_batch without releasing GIL (return type *mut
DLManagedTensor is !Send).
+ let result = iter.next_batch();
+ match result {
+ Ok(Some(ptr)) => {
+ slf.inner = Some(iter);
+ Ok(QuantumTensor {
+ ptr,
+ consumed: false,
+ })
+ }
+ Ok(None) => {
+ // Exhausted; do not put iterator back
+ Err(pyo3::exceptions::PyStopIteration::new_err(""))
+ }
+ Err(e) => {
+ slf.inner = Some(iter);
+ Err(PyRuntimeError::new_err(format!(
+ "Pipeline next_batch failed: {}",
+ e
+ )))
+ }
+ }
+ }
+ }
+
+ /// Build PipelineConfig from Python args. device_id is 0 (engine does not
expose it); iterator uses engine clone with correct device.
+ pub fn config_from_args(
+ _engine: &CoreEngine,
+ batch_size: usize,
+ num_qubits: u32,
+ encoding_method: &str,
+ total_batches: usize,
+ seed: Option<u64>,
+ ) -> PipelineConfig {
+ PipelineConfig {
+ device_id: 0,
+ num_qubits,
+ batch_size,
+ total_batches,
+ encoding_method: encoding_method.to_string(),
+ seed,
+ warmup_batches: 0,
+ }
+ }
+
+ /// Resolve path from Python str or pathlib.Path (__fspath__).
+ pub fn path_from_py(path: &Bound<'_, PyAny>) -> PyResult<String> {
+ path.extract::<String>().or_else(|_| {
+ path.call_method0("__fspath__")
+ .and_then(|m| m.extract::<String>())
+ })
+ }
+}
+
+#[cfg(target_os = "linux")]
+pub use loader_impl::{PyQuantumLoader, config_from_args, path_from_py};
diff --git a/qdp/qdp-python/src/pytorch.rs b/qdp/qdp-python/src/pytorch.rs
new file mode 100644
index 000000000..cb5c75247
--- /dev/null
+++ b/qdp/qdp-python/src/pytorch.rs
@@ -0,0 +1,251 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::prelude::*;
+use std::ffi::c_void;
+
+/// Helper to detect PyTorch tensor
+pub fn is_pytorch_tensor(obj: &Bound<'_, PyAny>) -> PyResult<bool> {
+ let type_obj = obj.get_type();
+ let name = type_obj.name()?;
+ if name != "Tensor" {
+ return Ok(false);
+ }
+ let module = type_obj.module()?;
+ let module_name = module.to_str()?;
+ Ok(module_name == "torch")
+}
+
+/// Helper to validate CPU tensor
+pub fn validate_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<()> {
+ if !is_pytorch_tensor(tensor)? {
+ return Err(PyRuntimeError::new_err("Object is not a PyTorch Tensor"));
+ }
+
+ let device = tensor.getattr("device")?;
+ let device_type: String = device.getattr("type")?.extract()?;
+
+ if device_type != "cpu" {
+ return Err(PyRuntimeError::new_err(format!(
+ "Only CPU tensors are currently supported for this path. Got
device: {}",
+ device_type
+ )));
+ }
+
+ Ok(())
+}
+
+/// Check if a PyTorch tensor is on a CUDA device
+pub fn is_cuda_tensor(tensor: &Bound<'_, PyAny>) -> PyResult<bool> {
+ let device = tensor.getattr("device")?;
+ let device_type: String = device.getattr("type")?.extract()?;
+ Ok(device_type == "cuda")
+}
+
+/// Validate array/tensor shape (must be 1D or 2D)
+///
+/// Args:
+/// ndim: Number of dimensions
+/// context: Context string for error message (e.g., "array", "tensor",
"CUDA tensor")
+///
+/// Returns:
+/// Ok(()) if shape is valid (1D or 2D), otherwise returns an error
+pub fn validate_shape(ndim: usize, context: &str) -> PyResult<()> {
+ match ndim {
+ 1 | 2 => Ok(()),
+ _ => {
+ let item_type = if context.contains("array") {
+ "array"
+ } else {
+ "tensor"
+ };
+ Err(PyRuntimeError::new_err(format!(
+ "Unsupported {} shape: {}D. Expected 1D {} for single sample \
+ encoding or 2D {} (batch_size, features) for batch encoding.",
+ context, ndim, item_type, item_type
+ )))
+ }
+ }
+}
+
+/// Get the CUDA device index from a PyTorch tensor
+pub fn get_tensor_device_id(tensor: &Bound<'_, PyAny>) -> PyResult<i32> {
+ let device = tensor.getattr("device")?;
+ let device_index: i32 = device.getattr("index")?.extract()?;
+ Ok(device_index)
+}
+
+/// Get the current CUDA stream pointer for the tensor's device.
+pub fn get_torch_cuda_stream_ptr(tensor: &Bound<'_, PyAny>) -> PyResult<*mut
c_void> {
+ let py = tensor.py();
+ let torch = PyModule::import(py, "torch")
+ .map_err(|_| PyRuntimeError::new_err("Failed to import torch
module"))?;
+ let cuda = torch.getattr("cuda")?;
+ let device = tensor.getattr("device")?;
+ let stream = cuda.call_method1("current_stream", (device,))?;
+
+ // Defensive validation: ensure the stream is a CUDA stream on the same
device
+ let stream_device = stream.getattr("device").map_err(|_| {
+ PyRuntimeError::new_err("CUDA stream object from PyTorch is missing
'device' attribute")
+ })?;
+ let stream_device_type: String = stream_device
+ .getattr("type")
+ .and_then(|obj| obj.extract())
+ .map_err(|_| {
+ PyRuntimeError::new_err(
+ "Failed to extract CUDA stream device type from PyTorch
stream.device",
+ )
+ })?;
+ if stream_device_type != "cuda" {
+ return Err(PyRuntimeError::new_err(format!(
+ "Expected CUDA stream device type 'cuda', got '{}'",
+ stream_device_type
+ )));
+ }
+
+ let stream_device_index: i32 = stream_device
+ .getattr("index")
+ .and_then(|obj| obj.extract())
+ .map_err(|_| {
+ PyRuntimeError::new_err(
+ "Failed to extract CUDA stream device index from PyTorch
stream.device",
+ )
+ })?;
+ let tensor_device_index = get_tensor_device_id(tensor)?;
+ if stream_device_index != tensor_device_index {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA stream device index ({}) does not match tensor device index
({})",
+ stream_device_index, tensor_device_index
+ )));
+ }
+
+ let stream_ptr: u64 = stream.getattr("cuda_stream")?.extract()?;
+ Ok(if stream_ptr == 0 {
+ std::ptr::null_mut()
+ } else {
+ stream_ptr as *mut c_void
+ })
+}
+
+/// Validate a CUDA tensor for direct GPU encoding
+/// Checks: dtype matches encoding method, contiguous, non-empty, device_id
matches engine
+pub fn validate_cuda_tensor_for_encoding(
+ tensor: &Bound<'_, PyAny>,
+ expected_device_id: usize,
+ encoding_method: &str,
+) -> PyResult<()> {
+ let method = encoding_method.to_ascii_lowercase();
+
+ // Check encoding method support and dtype (ASCII lowercase for
case-insensitive match).
+ let dtype = tensor.getattr("dtype")?;
+ let dtype_str: String = dtype.str()?.extract()?;
+ let dtype_str_lower = dtype_str.to_ascii_lowercase();
+ match method.as_str() {
+ "amplitude" => {
+ if !(dtype_str_lower.contains("float64") ||
dtype_str_lower.contains("float32")) {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA tensor must have dtype float64 or float32 for
amplitude encoding, got {}. \
+ Use tensor.to(torch.float64) or tensor.to(torch.float32)",
+ dtype_str
+ )));
+ }
+ }
+ "angle" => {
+ if !dtype_str_lower.contains("float64") {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA tensor must have dtype float64 for angle encoding,
got {}. \
+ Use tensor.to(torch.float64)",
+ dtype_str
+ )));
+ }
+ }
+ "basis" => {
+ if !dtype_str_lower.contains("int64") {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA tensor must have dtype int64 for basis encoding, got
{}. \
+ Use tensor.to(torch.int64)",
+ dtype_str
+ )));
+ }
+ }
+ _ => {
+ return Err(PyRuntimeError::new_err(format!(
+ "CUDA tensor encoding currently only supports 'amplitude',
'angle', or 'basis' methods, got '{}'. \
+ Use tensor.cpu() to convert to CPU tensor for other encoding
methods.",
+ encoding_method
+ )));
+ }
+ }
+
+ // Check contiguous
+ let is_contiguous: bool = tensor.call_method0("is_contiguous")?.extract()?;
+ if !is_contiguous {
+ return Err(PyRuntimeError::new_err(
+ "CUDA tensor must be contiguous. Use tensor.contiguous()",
+ ));
+ }
+
+ // Check non-empty
+ let numel: usize = tensor.call_method0("numel")?.extract()?;
+ if numel == 0 {
+ return Err(PyRuntimeError::new_err("CUDA tensor cannot be empty"));
+ }
+
+ // Check device matches engine
+ let tensor_device_id = get_tensor_device_id(tensor)?;
+ if tensor_device_id as usize != expected_device_id {
+ return Err(PyRuntimeError::new_err(format!(
+ "Device mismatch: tensor is on cuda:{}, but engine is on cuda:{}. \
+ Move tensor with tensor.to('cuda:{}')",
+ tensor_device_id, expected_device_id, expected_device_id
+ )));
+ }
+
+ Ok(())
+}
+
+/// Minimal CUDA tensor metadata extracted via PyTorch APIs.
+pub struct CudaTensorInfo {
+ pub data_ptr: *const f64,
+ pub shape: Vec<i64>,
+}
+
+/// Extract GPU pointer and shape directly from a PyTorch CUDA tensor.
+///
+/// # Safety
+/// The returned pointer is borrowed from the source tensor. The caller must
+/// ensure the tensor remains alive and unmodified for the duration of use.
+pub fn extract_cuda_tensor_info(tensor: &Bound<'_, PyAny>) ->
PyResult<CudaTensorInfo> {
+ let data_ptr: u64 = tensor.call_method0("data_ptr")?.extract()?;
+ if data_ptr == 0 {
+ return Err(PyRuntimeError::new_err(
+ "PyTorch returned a null data pointer for CUDA tensor",
+ ));
+ }
+
+ let ndim: usize = tensor.call_method0("dim")?.extract()?;
+ let mut shape = Vec::with_capacity(ndim);
+ for axis in 0..ndim {
+ let dim: i64 = tensor.call_method1("size", (axis,))?.extract()?;
+ shape.push(dim);
+ }
+
+ Ok(CudaTensorInfo {
+ data_ptr: data_ptr as *const f64,
+ shape,
+ })
+}
diff --git a/qdp/qdp-python/src/tensor.rs b/qdp/qdp-python/src/tensor.rs
new file mode 100644
index 000000000..ab341d1ac
--- /dev/null
+++ b/qdp/qdp-python/src/tensor.rs
@@ -0,0 +1,147 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use pyo3::exceptions::PyRuntimeError;
+use pyo3::ffi;
+use pyo3::prelude::*;
+use qdp_core::dlpack::DLManagedTensor;
+
+/// Quantum tensor wrapper implementing DLPack protocol
+///
+/// This class wraps a GPU-allocated quantum state vector and implements
+/// the DLPack protocol for zero-copy integration with PyTorch and other
+/// array libraries.
+///
+/// Example:
+/// >>> engine = QdpEngine(device_id=0)
+/// >>> qtensor = engine.encode([1.0, 2.0, 3.0], num_qubits=2,
encoding_method="amplitude")
+/// >>> torch_tensor = torch.from_dlpack(qtensor)
+#[pyclass]
+pub struct QuantumTensor {
+ pub ptr: *mut DLManagedTensor,
+ pub consumed: bool,
+}
+
+#[pymethods]
+impl QuantumTensor {
+ /// Implements DLPack protocol - returns PyCapsule for PyTorch
+ ///
+ /// This method is called by torch.from_dlpack() to get the GPU memory
pointer.
+ /// The capsule can only be consumed once to prevent double-free errors.
+ ///
+ /// Args:
+ /// stream: Optional CUDA stream (DLPack 0.8+; 1=legacy default,
2=per-thread default)
+ ///
+ /// Returns:
+ /// PyCapsule containing DLManagedTensor pointer
+ ///
+ /// Raises:
+ /// RuntimeError: If the tensor has already been consumed
+ #[pyo3(signature = (stream=None))]
+ fn __dlpack__<'py>(&mut self, py: Python<'py>, stream: Option<i64>) ->
PyResult<Py<PyAny>> {
+ if self.consumed {
+ return Err(PyRuntimeError::new_err(
+ "DLPack tensor already consumed (can only be used once)",
+ ));
+ }
+
+ if self.ptr.is_null() {
+ return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
+ }
+
+ if let Some(stream) = stream
+ && stream > 0
+ {
+ let stream_ptr = qdp_core::dlpack::dlpack_stream_to_cuda(stream);
+ unsafe {
+ qdp_core::dlpack::synchronize_stream(stream_ptr).map_err(|e| {
+ PyRuntimeError::new_err(format!("CUDA stream sync failed:
{}", e))
+ })?;
+ }
+ }
+
+ // Mark as consumed to prevent double-free
+ self.consumed = true;
+
+ // Create PyCapsule using FFI
+ // PyTorch will call the deleter stored in DLManagedTensor.deleter
+ // Use a static C string for the capsule name to avoid lifetime issues
+ const DLTENSOR_NAME: &[u8] = b"dltensor\0";
+
+ unsafe {
+ // Create PyCapsule without a destructor
+ // PyTorch will manually call the deleter from DLManagedTensor
+ let capsule_ptr = ffi::PyCapsule_New(
+ self.ptr as *mut std::ffi::c_void,
+ DLTENSOR_NAME.as_ptr() as *const i8,
+ None, // No destructor - PyTorch handles it
+ );
+
+ if capsule_ptr.is_null() {
+ return Err(PyRuntimeError::new_err("Failed to create
PyCapsule"));
+ }
+
+ Ok(Py::from_owned_ptr(py, capsule_ptr))
+ }
+ }
+
+ /// Returns DLPack device information
+ ///
+ /// Returns:
+ /// Tuple of (device_type, device_id) where device_type=2 for CUDA
+ fn __dlpack_device__(&self) -> PyResult<(i32, i32)> {
+ if self.ptr.is_null() {
+ return Err(PyRuntimeError::new_err("Invalid DLPack tensor
pointer"));
+ }
+
+ unsafe {
+ let tensor = &(*self.ptr).dl_tensor;
+ // DLPack device_type: kDLCUDA = 2, kDLCPU = 1
+ let device_type = match tensor.device.device_type {
+ qdp_core::dlpack::DLDeviceType::kDLCUDA => 2,
+ qdp_core::dlpack::DLDeviceType::kDLCPU => 1,
+ };
+ // Read device_id from DLPack tensor metadata
+ Ok((device_type, tensor.device.device_id))
+ }
+ }
+}
+
+impl Drop for QuantumTensor {
+ fn drop(&mut self) {
+ // Only free if not consumed by __dlpack__
+ // If consumed, PyTorch/consumer will call the deleter
+ if !self.consumed && !self.ptr.is_null() {
+ unsafe {
+ // Defensive check: qdp-core always provides a deleter
+ debug_assert!(
+ (*self.ptr).deleter.is_some(),
+ "DLManagedTensor from qdp-core should always have a
deleter"
+ );
+
+ // Call the DLPack deleter to free memory
+ if let Some(deleter) = (*self.ptr).deleter {
+ deleter(self.ptr);
+ }
+ }
+ }
+ }
+}
+
+// Safety: QuantumTensor can be sent between threads
+// The DLManagedTensor pointer management is thread-safe via Arc in the deleter
+unsafe impl Send for QuantumTensor {}
+unsafe impl Sync for QuantumTensor {}