This is an automated email from the ASF dual-hosted git repository.

400Ping pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git


The following commit(s) were added to refs/heads/main by this push:
     new 599a2616f docs: refresh QDP Python API docstrings (#1335)
599a2616f is described below

commit 599a2616f7e1902468b5ef04c41abe955e564f21
Author: Vic Wen <[email protected]>
AuthorDate: Wed Jun 3 09:47:35 2026 +0800

    docs: refresh QDP Python API docstrings (#1335)
---
 qdp/qdp-python/qumat_qdp/api.py        |  61 +++++++++++++++--
 qdp/qdp-python/qumat_qdp/backend.py    |  17 ++++-
 qdp/qdp-python/qumat_qdp/loader.py     | 119 ++++++++++++++++++++++++++++-----
 qdp/qdp-python/qumat_qdp/tensor.py     |  15 ++++-
 qdp/qdp-python/qumat_qdp/triton_amd.py |  20 +++++-
 5 files changed, 206 insertions(+), 26 deletions(-)

diff --git a/qdp/qdp-python/qumat_qdp/api.py b/qdp/qdp-python/qumat_qdp/api.py
index c593e5d41..a872a5547 100644
--- a/qdp/qdp-python/qumat_qdp/api.py
+++ b/qdp/qdp-python/qumat_qdp/api.py
@@ -38,7 +38,12 @@ from typing import Any
 
 @dataclass
 class ThroughputResult:
-    """Result of run_throughput(): duration and vectors per second."""
+    """Throughput benchmark measurement.
+
+    Returned by :meth:`QdpBenchmark.run_throughput`.  ``duration_sec`` is the
+    measured timed section after any configured warmup batches.  
``vectors_per_sec``
+    is computed over ``total_batches * batch_size`` encoded input vectors.
+    """
 
     duration_sec: float
     vectors_per_sec: float
@@ -46,7 +51,12 @@ class ThroughputResult:
 
 @dataclass
 class LatencyResult:
-    """Result of run_latency(): duration and ms per vector."""
+    """Latency benchmark measurement.
+
+    Returned by :meth:`QdpBenchmark.run_latency`.  ``duration_sec`` is the same
+    timed interval used for throughput, and ``latency_ms_per_vector`` is the
+    average milliseconds per encoded input vector across the measured batches.
+    """
 
     duration_sec: float
     latency_ms_per_vector: float
@@ -132,7 +142,15 @@ class QdpBenchmark:
         return self
 
     def prefetch(self, n: int) -> QdpBenchmark:
-        """No-op for API compatibility; Rust pipeline does not use prefetch 
from Python."""
+        """Accept a prefetch setting for fluent API compatibility.
+
+        The current Rust benchmark pipeline manages work internally and the
+        PyTorch reference path does not use a Python-side prefetch queue, so
+        ``n`` is intentionally ignored.
+
+        :param n: Requested prefetch depth; currently unused.
+        :returns: ``self`` for fluent builder chaining.
+        """
         return self
 
     def warmup(self, n: int) -> QdpBenchmark:
@@ -145,7 +163,17 @@ class QdpBenchmark:
         return self
 
     def backend(self, name: str) -> QdpBenchmark:
-        """Set benchmark backend: ``'rust'`` or ``'pytorch'``."""
+        """Select the benchmark execution backend.
+
+        ``"rust"`` (the default) uses the native optimized pipeline exposed by
+        the ``_qdp`` extension and raises at run time if that extension or 
entry
+        point is unavailable.  ``"pytorch"`` uses the pure-PyTorch reference
+        implementation on the selected CUDA device when usable, otherwise CPU.
+
+        :param name: Backend name, either ``"rust"`` or ``"pytorch"``.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``name`` is not a supported backend.
+        """
         if name not in ("rust", "pytorch"):
             raise ValueError(f"backend must be 'rust' or 'pytorch', got 
{name!r}")
         self._backend_name = name
@@ -158,14 +186,35 @@ class QdpBenchmark:
             )
 
     def run_throughput(self) -> ThroughputResult:
-        """Run throughput benchmark using the selected backend."""
+        """Run the configured throughput benchmark.
+
+        ``qubits()`` and ``batches()`` must be configured before calling this
+        method.  The default ``"rust"`` backend calls the native ``_qdp``
+        pipeline with any configured warmup batches; ``"pytorch"`` runs the
+        reference encoder loop and synchronizes CUDA timing when applicable.
+
+        :returns: A :class:`ThroughputResult` containing elapsed seconds and
+            encoded vectors per second.
+        :raises ValueError: If required benchmark parameters are missing.
+        :raises RuntimeError: If the Rust backend is selected but unavailable.
+        """
         self._validate()
         if self._backend_name == "pytorch":
             return self._run_throughput_pytorch()
         return self._run_throughput_rust()
 
     def run_latency(self) -> LatencyResult:
-        """Run latency benchmark using the selected backend."""
+        """Run the configured latency benchmark.
+
+        ``qubits()`` and ``batches()`` must be configured before calling this
+        method.  The Rust backend reports latency from the native pipeline; the
+        PyTorch backend derives average latency from its throughput run.
+
+        :returns: A :class:`LatencyResult` containing elapsed seconds and mean
+            milliseconds per encoded vector.
+        :raises ValueError: If required benchmark parameters are missing.
+        :raises RuntimeError: If the Rust backend is selected but unavailable.
+        """
         self._validate()
         if self._backend_name == "pytorch":
             return self._run_latency_pytorch()
diff --git a/qdp/qdp-python/qumat_qdp/backend.py 
b/qdp/qdp-python/qumat_qdp/backend.py
index 098662d88..42f105172 100644
--- a/qdp/qdp-python/qumat_qdp/backend.py
+++ b/qdp/qdp-python/qumat_qdp/backend.py
@@ -70,7 +70,22 @@ def _select_engine_adapter(
 
 
 class QdpEngine:
-    """Unified Python facade over the CUDA and Triton engine routes."""
+    """Select and delegate to a native QDP encoding backend.
+
+    ``QdpEngine`` is the small public Python facade used by callers that want
+    explicit backend selection.  ``backend="cuda"`` routes to the Rust/CUDA
+    extension-backed engine.  ``backend="amd"`` and ``backend="triton_amd"``
+    route to the AMD/Triton implementation.  The selected backend is exposed as
+    ``self.backend`` (``"cuda"`` or ``"amd"``) and all ``encode()`` calls are
+    forwarded to that engine.
+
+    :param device_id: GPU device ordinal to use.
+    :param precision: Numeric precision requested from the backend, such as
+        ``"float32"`` or ``"float64"`` when supported by that backend.
+    :param backend: Backend selector.  Valid values are ``"cuda"``, ``"amd"``,
+        and ``"triton_amd"``.
+    :raises ValueError: If ``backend`` is not one of the supported selectors.
+    """
 
     def __init__(
         self,
diff --git a/qdp/qdp-python/qumat_qdp/loader.py 
b/qdp/qdp-python/qumat_qdp/loader.py
index 34fae6a1a..8f05affe7 100644
--- a/qdp/qdp-python/qumat_qdp/loader.py
+++ b/qdp/qdp-python/qumat_qdp/loader.py
@@ -252,10 +252,14 @@ def _sample_dim(num_qubits: int, encoding_method: str) -> 
int:
 
 class QuantumDataLoader:
     """
-    Builder for a synthetic-data quantum encoding iterator.
-
-    Yields one QuantumTensor (batch) per iteration. All encoding runs in Rust;
-    __iter__ returns the Rust-backed iterator from create_synthetic_loader.
+    Builder for batched QDP encoding iterators.
+
+    ``QuantumDataLoader`` can generate synthetic input samples or read 
supported
+    file formats, then encode each batch with the selected backend.  The 
default
+    ``"rust"`` backend returns Rust-backed ``QuantumTensor`` batches, while the
+    explicit ``"pytorch"`` backend returns ``torch.Tensor`` batches.  The
+    ``"auto"`` backend tries the Rust extension first and falls back to PyTorch
+    when the native extension is unavailable.
     """
 
     def __init__(
@@ -291,14 +295,37 @@ class QuantumDataLoader:
         self._backend_name: str = _BACKEND_RUST
 
     def qubits(self, n: int) -> QuantumDataLoader:
-        """Set number of qubits. Returns self for chaining."""
+        """Set the number of qubits used by subsequent encodings.
+
+        ``n`` must be a positive integer.  The value controls the encoded state
+        size (for example, amplitude and phase-style encodings produce vectors
+        of length ``2**n``) and the expected input width for encodings such as
+        ``"angle"`` and ``"iqp-z"``.
+
+        :param n: Positive qubit count.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``n`` is not a positive integer.
+        """
         if not isinstance(n, int) or n < 1:
             raise ValueError(f"num_qubits must be a positive integer, got 
{n!r}")
         self._num_qubits = n
         return self
 
     def encoding(self, method: str) -> QuantumDataLoader:
-        """Set encoding method (e.g. 'amplitude', 'angle', 'basis'). Returns 
self."""
+        """Set the quantum feature encoding method.
+
+        Valid values are ``"amplitude"``, ``"angle"``, ``"basis"``,
+        ``"iqp"``, ``"iqp-z"``, and ``"phase"``.  Use these canonical
+        lowercase names because the selected backend receives the string 
exactly
+        as supplied.  The PyTorch reference backend supports the same methods 
as
+        :mod:`qumat_qdp.torch_ref`; use the native backend for methods that are
+        not available in the reference path.
+
+        :param method: Encoding method name.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``method`` is empty, not a string, or not a
+            supported encoding.
+        """
         if not method or not isinstance(method, str):
             raise ValueError(
                 f"encoding_method must be a non-empty string, got {method!r}"
@@ -312,7 +339,19 @@ class QuantumDataLoader:
         return self
 
     def batches(self, total: int, size: int = 64) -> QuantumDataLoader:
-        """Set total number of batches and batch size. Returns self."""
+        """Set the number of batches to produce and samples per batch.
+
+        Both ``total`` and ``size`` must be positive integers.  For synthetic
+        sources, ``total`` is the exact number of generated batches.  For file
+        sources handled by the PyTorch fallback, iteration stops at the smaller
+        of ``total`` and the number of complete/partial batches available from
+        the loaded file.
+
+        :param total: Positive maximum number of batches to emit.
+        :param size: Positive number of samples per encoded batch.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If either argument is not a positive integer.
+        """
         if not isinstance(total, int) or total < 1:
             raise ValueError(f"total_batches must be a positive integer, got 
{total!r}")
         if not isinstance(size, int) or size < 1:
@@ -325,7 +364,22 @@ class QuantumDataLoader:
         self,
         total_batches: int | None = None,
     ) -> QuantumDataLoader:
-        """Use synthetic data source (default). Optionally override 
total_batches. Returns self."""
+        """Select the synthetic data source.
+
+        Synthetic data is the default when no file source is configured, but
+        calling this method records the source choice explicitly.  Use
+        ``seed()`` to make generated samples reproducible where the selected
+        backend supports seeded generation.  If ``total_batches`` is provided,
+        it overrides the current batch count and must be a positive integer.
+        Selecting both ``source_synthetic()`` and ``source_file()`` on the same
+        loader is rejected when iteration starts.
+
+        :param total_batches: Optional positive replacement for the configured
+            number of batches.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``total_batches`` is provided but is not a
+            positive integer.
+        """
         self._synthetic_requested = True
         if total_batches is not None:
             if not isinstance(total_batches, int) or total_batches < 1:
@@ -336,12 +390,21 @@ class QuantumDataLoader:
         return self
 
     def source_file(self, path: str, streaming: bool = False) -> 
QuantumDataLoader:
-        """Use file data source. Path must point to a supported format. 
Returns self.
-
-        For streaming=True (Phase 2b), only .parquet is supported; data is 
read in chunks to reduce memory.
-        For streaming=False, supports .parquet, .arrow, .feather, .ipc, .npy, 
.pt, .pth, .pb.
-        Remote paths (s3://, gs://) are supported when the remote-io feature 
is enabled.
-        Remote URL query/fragment (for example ?versionId=... or #...) is not 
supported.
+        """Use a file data source.
+
+        Non-streaming native loading accepts ``.parquet``, ``.arrow``,
+        ``.feather``, ``.ipc``, ``.npy``, ``.pt``, ``.pth``, and ``.pb`` files.
+        The PyTorch fallback path supports only ``.npy``, ``.pt``, and ``.pth``
+        inputs because it loads the full tensor into memory before encoding.
+        Streaming mode is native-only and currently accepts ``.parquet`` files.
+        Remote ``s3://`` and ``gs://`` paths are accepted when the native 
remote
+        I/O feature is enabled; remote query strings and fragments are 
rejected.
+
+        :param path: Local or supported remote input path.
+        :param streaming: Whether to request native streaming file loading.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``path`` is empty, includes an unsupported 
remote
+            query/fragment, or requests streaming for an unsupported extension.
         """
         if not path or not isinstance(path, str):
             raise ValueError(f"path must be a non-empty string, got {path!r}")
@@ -363,7 +426,17 @@ class QuantumDataLoader:
         return self
 
     def seed(self, s: int | None = None) -> QuantumDataLoader:
-        """Set RNG seed for reproducible synthetic data (must fit Rust u64: 0 
<= seed <= 2^64-1). Returns self."""
+        """Set or clear the synthetic data seed.
+
+        ``None`` leaves the loader unseeded for the native Rust path and maps 
to
+        the PyTorch reference path's default deterministic seed.  Integer seeds
+        must fit Rust ``u64`` so the same configuration can be passed to the
+        native backend.
+
+        :param s: ``None`` or an integer in ``[0, 2**64 - 1]``.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``s`` is not ``None`` or a valid Rust ``u64``.
+        """
         if s is not None:
             if not isinstance(s, int):
                 raise ValueError(
@@ -377,7 +450,19 @@ class QuantumDataLoader:
         return self
 
     def null_handling(self, policy: str) -> QuantumDataLoader:
-        """Set null handling policy ('fill_zero' or 'reject'). Returns self 
for chaining."""
+        """Set how nullable file inputs are handled by the native loader.
+
+        Valid policies are ``"fill_zero"`` (replace nulls with zero before
+        encoding) and ``"reject"`` (fail on null input).  The policy is passed
+        through to Rust file and synthetic loader creation when available.  The
+        PyTorch fallback loaders do not consume this setting because supported
+        ``.npy``/``.pt``/``.pth`` inputs are loaded as dense tensors.
+
+        :param policy: Null handling policy, either ``"fill_zero"`` or
+            ``"reject"``.
+        :returns: ``self`` for fluent builder chaining.
+        :raises ValueError: If ``policy`` is not supported.
+        """
         if policy not in ("fill_zero", "reject"):
             raise ValueError(
                 f"null_handling must be 'fill_zero' or 'reject', got 
{policy!r}"
@@ -618,7 +703,7 @@ class QuantumDataLoader:
                        .as_torch_dataset())
             loader = torch.utils.data.DataLoader(dataset, batch_size=None, 
num_workers=0)
             for batch in loader:
-                ...  # batch is torch.Tensor, shape (64, 2*2^16)
+                ...  # batch is torch.Tensor, shape (64, 2**16)
 
         Note: ``batch_size=None`` in DataLoader disables DataLoader's own 
batching;
         ``num_workers=0`` is required because the Rust backend holds GPU state 
that
diff --git a/qdp/qdp-python/qumat_qdp/tensor.py 
b/qdp/qdp-python/qumat_qdp/tensor.py
index c9d651a73..bdabf931b 100644
--- a/qdp/qdp-python/qumat_qdp/tensor.py
+++ b/qdp/qdp-python/qumat_qdp/tensor.py
@@ -24,7 +24,20 @@ from typing import Any
 
 @dataclass
 class QdpTensor:
-    """Thin DLPack facade over backend-native tensor producers."""
+    """DLPack-compatible wrapper for backend-native QDP tensor results.
+
+    The Rust/CUDA path and other native backends may return objects whose
+    concrete tensor type is backend-specific.  ``QdpTensor`` preserves that
+    object in ``value`` while exposing ``__dlpack__`` and ``__dlpack_device__``
+    so consumers such as PyTorch can import it without a copy.
+
+    :param value: Backend-native tensor-like object.  It must implement the
+        DLPack protocol when converted with ``to_torch()`` or
+        ``torch.from_dlpack``.
+    :param backend: Human-readable backend name used in error messages.
+    :raises RuntimeError: If ``value`` does not implement the required DLPack
+        methods when conversion is attempted.
+    """
 
     value: Any
     backend: str
diff --git a/qdp/qdp-python/qumat_qdp/triton_amd.py 
b/qdp/qdp-python/qumat_qdp/triton_amd.py
index 678fb49a5..1fbd84026 100644
--- a/qdp/qdp-python/qumat_qdp/triton_amd.py
+++ b/qdp/qdp-python/qumat_qdp/triton_amd.py
@@ -128,7 +128,25 @@ _IQP_PAIR_MATRIX_MAX_N = 20
 
 @dataclass
 class TritonAmdEngine:
-    """AMD backend implementing amplitude/angle/basis/iqp/iqp-z/phase 
encoders."""
+    """ROCm/Triton implementation of the QDP encoder interface.
+
+    This engine targets AMD GPUs through a PyTorch ROCm runtime plus the Triton
+    Python package.  ``encode()`` accepts ``"amplitude"``, ``"angle"``,
+    ``"basis"``, ``"iqp"``, ``"iqp-z"``, and ``"phase"``.  The phase encoder
+    uses a fused Triton HIP kernel for ``float32`` and ``1 <= num_qubits <= 
32``;
+    other supported cases fall back to vectorized PyTorch operations on the 
same
+    ROCm device.
+
+    ``precision`` accepts ``"float32"``/``"f32"``/``"float"`` and
+    ``"float64"``/``"f64"``/``"double"``.  Runtime availability is checked when
+    ``encode()`` is called and raises a descriptive ``RuntimeError`` if PyTorch
+    ROCm or Triton is unavailable.
+
+    :param device_id: ROCm device ordinal, addressed through PyTorch as
+        ``cuda:{device_id}``.
+    :param precision: Floating-point precision for real inputs and complex
+        outputs.
+    """
 
     device_id: int = 0
     precision: str = "float32"

Reply via email to