This is an automated email from the ASF dual-hosted git repository.
hcr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 38186376a feat: add MNIST amplitude encoding benchmarks for PennyLane
and QDP p… (#1161)
38186376a is described below
commit 38186376ac0a3cd4e689b1be1b33725f773fdcc0
Author: Ryan Huang <[email protected]>
AuthorDate: Tue Mar 10 15:51:56 2026 +0800
feat: add MNIST amplitude encoding benchmarks for PennyLane and QDP p…
(#1161)
* feat: add MNIST amplitude encoding benchmarks for PennyLane and QDP
pipelines
* feat: add timing for QDP encoding in MNIST amplitude pipeline
---
.../benchmark/encoding_benchmarks/README.md | 52 +++
.../pennylane_baseline/mnist_amplitude.py | 362 ++++++++++++++++++
.../qdp_pipeline/mnist_amplitude.py | 423 +++++++++++++++++++++
uv.lock | 4 -
4 files changed, 837 insertions(+), 4 deletions(-)
diff --git a/qdp/qdp-python/benchmark/encoding_benchmarks/README.md
b/qdp/qdp-python/benchmark/encoding_benchmarks/README.md
index 97c70ab36..bf48910bb 100644
--- a/qdp/qdp-python/benchmark/encoding_benchmarks/README.md
+++ b/qdp/qdp-python/benchmark/encoding_benchmarks/README.md
@@ -69,9 +69,61 @@ uv run python
benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py \
--optimizer nesterov --lr 0.01 --layers 6 --trials 3 --iters 80 --early-stop 0
```
+## MNIST amplitude baseline (pure PennyLane)
+
+Pipeline: 2-class MNIST (default: digits 3 vs 6) → PCA (784 → 2^qubits) → L2
normalize → `AmplitudeEmbedding` → variational classifier.
+
+```bash
+uv run python
benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py
+```
+
+Common flags (only the key ones):
+
+- `--qubits`: number of qubits; PCA reduces to 2^qubits features (default: 4 →
16-D)
+- `--digits`: two digits for binary classification (default: `3,6`)
+- `--n-samples`: max samples per class (default: 500)
+- `--iters`: optimizer steps (default: 2000)
+- `--layers`: number of variational layers (default: 10)
+- `--lr`: learning rate (default: 0.05)
+- `--optimizer`: `adam` or `nesterov` (default: `adam`)
+- `--trials`: number of restarts; best test accuracy reported (default: 10)
+
+Example (quick test):
+
+```bash
+uv run python
benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py \
+ --digits "3,6" --n-samples 100 --trials 3 --iters 500 --early-stop 0
+```
+
+## MNIST amplitude (QDP pipeline)
+
+Pipeline is identical to the baseline except for encoding:
+PCA-reduced vectors → QDP `QdpEngine.encode` (amplitude) →
`StatePrep(state_vector)` → same variational classifier.
+
+```bash
+uv run python benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py
+```
+
+The CLI mirrors the baseline, plus:
+
+- **QDP-specific flags**
+ - `--device-id`: QDP device id (default: 0)
+ - `--data-dir`: directory for temporary `.npy` files (default: system temp
directory)
+
+Example (same settings as the baseline example, but with QDP encoding):
+
+```bash
+uv run python benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py \
+ --digits "3,6" --n-samples 100 --trials 3 --iters 500 --early-stop 0
+```
+
+## Full help
+
To see the full list of options and defaults, append `--help`:
```bash
uv run python
benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py --help
+uv run python
benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py --help
uv run python benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py
--help
+uv run python benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py
--help
```
diff --git
a/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py
b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py
new file mode 100644
index 000000000..a15ec8662
--- /dev/null
+++
b/qdp/qdp-python/benchmark/encoding_benchmarks/pennylane_baseline/mnist_amplitude.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+PennyLane baseline: MNIST (2-class), amplitude encoding, variational
classifier.
+
+Data source: sklearn fetch_openml('mnist_784'), binary subset (default: digits
3 vs 6).
+Pipeline: PCA (784 -> 2^num_qubits) -> L2 norm -> AmplitudeEmbedding -> Rot
layers + CNOT ring
+-> expval(PauliZ(0)) + bias; square loss; SGD+Nesterov via torch.
+
+Training: lightning.gpu (adjoint, torch).
+"""
+
+from __future__ import annotations
+
+# --- Imports ---
+import argparse
+import time
+from typing import Any
+
+import numpy as np
+import torch
+
+try:
+ import pennylane as qml
+except ImportError as e:
+ raise SystemExit(
+ "PennyLane is required. Install with: uv sync --group benchmark"
+ ) from e
+
+try:
+ from sklearn.datasets import fetch_openml
+ from sklearn.decomposition import PCA
+ from sklearn.preprocessing import StandardScaler
+except ImportError as e:
+ raise SystemExit(
+ "scikit-learn is required. Install with: uv sync --group benchmark"
+ ) from e
+
+try:
+ from tqdm import trange
+except ImportError:
+ trange = None
+
+
+DEFAULT_NUM_QUBITS = 4
+DEFAULT_DIGITS = (3, 6)
+DEFAULT_N_SAMPLES = 500
+
+
+# --- Circuit: variational layer (Rot + CNOT ring) ---
+def layer(layer_weights, wires):
+ """Rot on each wire + ring of CNOTs (generalized from 2-qubit Iris
tutorial)."""
+ for i, w in enumerate(wires):
+ qml.Rot(*layer_weights[i], wires=w)
+ n = len(wires)
+ for i in range(n):
+ qml.CNOT(wires=[wires[i], wires[(i + 1) % n]])
+
+
+# --- Data: MNIST binary subset -> PCA -> L2 norm ---
+def load_mnist_binary(
+ digits: tuple[int, int] = DEFAULT_DIGITS,
+ n_samples: int = DEFAULT_N_SAMPLES,
+ num_qubits: int = DEFAULT_NUM_QUBITS,
+ seed: int = 42,
+) -> tuple[np.ndarray, np.ndarray]:
+ """
+ MNIST binary classification. Fetch two digit classes, subsample, PCA, L2
normalize.
+ Returns (features, Y) with features shape (n, 2**num_qubits), Y in {-1, 1}.
+ Data source: sklearn.datasets.fetch_openml('mnist_784').
+ """
+ state_dim = 2**num_qubits
+ rng = np.random.default_rng(seed)
+
+ X_raw, y_raw = fetch_openml(
+ "mnist_784", version=1, return_X_y=True, as_frame=False, parser="auto"
+ )
+ y = y_raw.astype(int)
+ mask = (y == digits[0]) | (y == digits[1])
+ X = np.asarray(X_raw[mask], dtype=np.float64)
+ y = y[mask]
+
+ # Balanced subsample
+ idx0 = np.where(y == digits[0])[0]
+ idx1 = np.where(y == digits[1])[0]
+ n0 = min(n_samples, len(idx0))
+ n1 = min(n_samples, len(idx1))
+ sel = np.concatenate(
+ [
+ rng.choice(idx0, size=n0, replace=False),
+ rng.choice(idx1, size=n1, replace=False),
+ ]
+ )
+ rng.shuffle(sel)
+ X = X[sel]
+ y = y[sel]
+
+ # StandardScaler -> PCA -> L2 norm
+ X = StandardScaler().fit_transform(X)
+ n_components = min(state_dim, X.shape[1], X.shape[0])
+ X = PCA(n_components=n_components, random_state=seed).fit_transform(X)
+ if n_components < state_dim:
+ X = np.pad(X, ((0, 0), (0, state_dim - n_components)),
constant_values=0.0)
+ norm = np.sqrt(np.sum(X**2, axis=-1)) + 1e-12
+ X_norm = (X.T / norm).T
+
+ # Labels: first digit -> -1, second digit -> +1
+ Y = np.where(y == digits[0], -1.0, 1.0)
+ return X_norm, Y
+
+
+# --- Training: GPU (lightning.gpu + torch + adjoint) ---
+def run_training(
+ features: np.ndarray,
+ Y: np.ndarray,
+ *,
+ num_qubits: int,
+ num_layers: int,
+ iterations: int,
+ batch_size: int,
+ lr: float,
+ seed: int,
+ test_size: float = 0.25,
+ early_stop_target: float | None = 0.95,
+ device_id: int = 0,
+) -> dict[str, Any]:
+ """Train classifier: AmplitudeEmbedding + Rot layers + bias, square loss,
batched.
+ Uses lightning.gpu + torch interface + adjoint diff. Data on GPU as torch
tensors.
+ Optional early stop when test acc >= target."""
+
+ # Train/val split (seed-driven)
+ n = len(Y)
+ rng = np.random.default_rng(seed)
+ idx = rng.permutation(n)
+ n_train = int(n * (1 - test_size))
+ X_train = features[idx[:n_train]]
+ X_test = features[idx[n_train:]]
+ Y_train = Y[idx[:n_train]]
+ Y_test = Y[idx[n_train:]]
+
+ wires = tuple(range(num_qubits))
+ device = torch.device(f"cuda:{device_id}")
+ dtype = torch.float64
+
+ feats_train = torch.tensor(X_train, dtype=dtype, device=device)
+ feats_test = torch.tensor(X_test, dtype=dtype, device=device)
+ Y_train_t = torch.tensor(Y_train, dtype=dtype, device=device)
+ Y_test_t = torch.tensor(Y_test, dtype=dtype, device=device)
+
+ dev_qml = qml.device("lightning.gpu", wires=num_qubits)
+
+ @qml.qnode(dev_qml, interface="torch", diff_method="adjoint")
+ def circuit(weights, x):
+ qml.AmplitudeEmbedding(features=x, wires=wires, normalize=False)
+ for lw in weights:
+ layer(lw, wires=wires)
+ return qml.expval(qml.PauliZ(0))
+
+ def model(weights, bias, x):
+ return circuit(weights, x) + bias
+
+ def cost(weights, bias, X_batch, Y_batch):
+ preds = model(weights, bias, X_batch)
+ return torch.mean((Y_batch - preds) ** 2)
+
+ torch.manual_seed(seed)
+ weights = (
+ (0.01 * torch.randn(num_layers, num_qubits, 3, device=device,
dtype=dtype))
+ .detach()
+ .requires_grad_(True)
+ )
+ bias = torch.zeros(1, device=device,
dtype=dtype).squeeze().requires_grad_(True)
+ opt = torch.optim.SGD([weights, bias], lr=lr, momentum=0.9, nesterov=True)
+
+ # Compile (first run)
+ t0 = time.perf_counter()
+ _ = circuit(weights, feats_train[0:1])
+ _ = cost(weights, bias, feats_train[:1], Y_train_t[:1])
+ compile_sec = time.perf_counter() - t0
+
+ # Optimize
+ t0 = time.perf_counter()
+ steps_done = 0
+ step_iter = (
+ trange(iterations, desc=" Training (GPU)", leave=False)
+ if trange
+ else range(iterations)
+ )
+ for step in step_iter:
+ opt.zero_grad()
+ batch_idx = rng.integers(0, n_train, size=(batch_size,))
+ fb = feats_train[batch_idx]
+ yb = Y_train_t[batch_idx]
+ loss = cost(weights, bias, fb, yb)
+ loss.backward()
+ opt.step()
+ steps_done += 1
+ if early_stop_target is not None and (step + 1) % 100 == 0:
+ with torch.no_grad():
+ pred_test_now = torch.sign(model(weights, bias,
feats_test)).flatten()
+ test_acc_now = (
+ (pred_test_now -
Y_test_t).abs().lt(1e-5).float().mean().item()
+ )
+ if test_acc_now >= early_stop_target:
+ break
+ train_sec = time.perf_counter() - t0
+
+ with torch.no_grad():
+ pred_train = torch.sign(model(weights, bias, feats_train)).flatten()
+ pred_test = torch.sign(model(weights, bias, feats_test)).flatten()
+ train_acc = (pred_train - Y_train_t).abs().lt(1e-5).float().mean().item()
+ test_acc = (pred_test - Y_test_t).abs().lt(1e-5).float().mean().item()
+
+ return {
+ "compile_time_sec": compile_sec,
+ "train_time_sec": train_sec,
+ "train_accuracy": float(train_acc),
+ "test_accuracy": float(test_acc),
+ "n_train": n_train,
+ "n_test": len(Y_test),
+ "epochs": steps_done,
+ "samples_per_sec": (steps_done * batch_size) / train_sec
+ if train_sec > 0
+ else 0.0,
+ "qml_device": "cuda",
+ }
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="PennyLane MNIST amplitude encoding baseline (2-class)"
+ )
+ parser.add_argument(
+ "--qubits",
+ type=int,
+ default=DEFAULT_NUM_QUBITS,
+ help=f"Number of qubits; PCA reduces to 2^qubits features (default:
{DEFAULT_NUM_QUBITS})",
+ )
+ parser.add_argument(
+ "--digits",
+ type=str,
+ default="3,6",
+ help="Two digits for binary classification, comma-separated (default:
'3,6')",
+ )
+ parser.add_argument(
+ "--n-samples",
+ type=int,
+ default=DEFAULT_N_SAMPLES,
+ help=f"Max samples per class (default: {DEFAULT_N_SAMPLES}; total <=
2*n_samples)",
+ )
+ parser.add_argument(
+ "--iters",
+ type=int,
+ default=2000,
+ help="Max optimizer steps per run (default: 2000)",
+ )
+ parser.add_argument(
+ "--batch-size", type=int, default=10, help="Batch size (default: 10)"
+ )
+ parser.add_argument(
+ "--layers", type=int, default=10, help="Variational layers (default:
10)"
+ )
+ parser.add_argument(
+ "--lr", type=float, default=0.05, help="Learning rate (default: 0.05)"
+ )
+ parser.add_argument(
+ "--test-size",
+ type=float,
+ default=0.25,
+ help="Test fraction (default: 0.25)",
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed
(default: 0)")
+ parser.add_argument(
+ "--trials",
+ type=int,
+ default=10,
+ help="Number of restarts; best test acc reported (default: 10)",
+ )
+ parser.add_argument(
+ "--early-stop",
+ type=float,
+ default=0.95,
+ help="Stop run when test acc >= this (default: 0.95; 0 = off)",
+ )
+ parser.add_argument(
+ "--device-id", type=int, default=0, help="GPU device id (default: 0)"
+ )
+ args = parser.parse_args()
+
+ d0, d1 = (int(d) for d in args.digits.split(","))
+ digits = (d0, d1)
+ features, Y = load_mnist_binary(
+ digits=digits,
+ n_samples=args.n_samples,
+ num_qubits=args.qubits,
+ seed=args.seed,
+ )
+ n = len(Y)
+ state_dim = 2**args.qubits
+ print("MNIST amplitude baseline (PennyLane) — 2-class variational
classifier")
+ print(
+ f" Data: fetch_openml('mnist_784'), digits {d0} vs {d1}, "
+ f"PCA {state_dim}-D, L2 norm (n={n})"
+ )
+ print(
+ f" Qubits: {args.qubits}, iters: {args.iters}, batch_size:
{args.batch_size}, "
+ f"layers: {args.layers}, lr: {args.lr}"
+ )
+
+ results: list[dict[str, Any]] = []
+ for t in range(args.trials):
+ r = run_training(
+ features,
+ Y,
+ num_qubits=args.qubits,
+ num_layers=args.layers,
+ iterations=args.iters,
+ batch_size=args.batch_size,
+ lr=args.lr,
+ seed=args.seed + t,
+ test_size=args.test_size,
+ early_stop_target=args.early_stop if args.early_stop > 0 else None,
+ device_id=args.device_id,
+ )
+ results.append(r)
+ print(f"\n Trial {t + 1}:")
+ print(f" QML device: {r.get('qml_device', 'cpu')}")
+ print(f" Compile: {r['compile_time_sec']:.4f} s")
+ print(f" Train: {r['train_time_sec']:.4f} s")
+ print(f" Train acc: {r['train_accuracy']:.4f} (n={r['n_train']})")
+ print(f" Test acc: {r['test_accuracy']:.4f} (n={r['n_test']})")
+ print(f" Throughput: {r['samples_per_sec']:.1f} samples/s")
+
+ if args.trials > 1:
+ test_accs = sorted(r["test_accuracy"] for r in results)
+ best = test_accs[-1]
+ mid = args.trials // 2
+ print(
+ f"\n Best test accuracy: {best:.4f} (median:
{test_accs[mid]:.4f}, "
+ f"min: {test_accs[0]:.4f}, max: {test_accs[-1]:.4f})"
+ )
+ if best >= 0.95:
+ print(" → Target ≥0.95 achieved.")
+
+
+if __name__ == "__main__":
+ main()
diff --git
a/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py
b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py
new file mode 100644
index 000000000..21266e702
--- /dev/null
+++
b/qdp/qdp-python/benchmark/encoding_benchmarks/qdp_pipeline/mnist_amplitude.py
@@ -0,0 +1,423 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+QDP pipeline: MNIST (2-class), same data and training as baseline; only
encoding differs.
+
+Data source: sklearn fetch_openml('mnist_784'), binary subset (default: digits
3 vs 6).
+Pipeline: PCA (784 -> 2^num_qubits) -> L2 norm -> QDP (QdpEngine.encode +
amplitude) -> StatePrep(encoded)
+-> Rot layers + CNOT ring -> expval(PauliZ(0)) + bias; square loss;
SGD+Nesterov via torch.
+
+Training: lightning.gpu (adjoint, torch).
+"""
+
+from __future__ import annotations
+
+# --- Imports ---
+import argparse
+import time
+from typing import Any
+
+import numpy as np
+import torch
+
+try:
+ import pennylane as qml
+except ImportError as e:
+ raise SystemExit(
+ "PennyLane is required. Install with: uv sync --group benchmark"
+ ) from e
+
+try:
+ from sklearn.datasets import fetch_openml
+ from sklearn.decomposition import PCA
+ from sklearn.preprocessing import StandardScaler
+except ImportError as e:
+ raise SystemExit(
+ "scikit-learn is required. Install with: uv sync --group benchmark"
+ ) from e
+
+from qumat_qdp import QdpEngine
+
+try:
+ from tqdm import trange
+except ImportError:
+ trange = None
+
+DEFAULT_NUM_QUBITS = 4
+DEFAULT_DIGITS = (3, 6)
+DEFAULT_N_SAMPLES = 500
+
+
+# --- Circuit: variational layer (Rot + CNOT ring) ---
+def layer(layer_weights, wires):
+ """Rot on each wire + ring of CNOTs (generalized from 2-qubit Iris
tutorial)."""
+ for i, w in enumerate(wires):
+ qml.Rot(*layer_weights[i], wires=w)
+ n = len(wires)
+ for i in range(n):
+ qml.CNOT(wires=[wires[i], wires[(i + 1) % n]])
+
+
+# --- Data: MNIST binary subset -> PCA -> L2 norm (returns raw vectors for
QDP) ---
+def load_mnist_binary_nd(
+ digits: tuple[int, int] = DEFAULT_DIGITS,
+ n_samples: int = DEFAULT_N_SAMPLES,
+ num_qubits: int = DEFAULT_NUM_QUBITS,
+ seed: int = 42,
+) -> tuple[np.ndarray, np.ndarray]:
+ """
+ MNIST binary classification. Fetch two digit classes, subsample, PCA, L2
normalize.
+ Returns (X_norm, Y) with X_norm shape (n, 2**num_qubits), Y in {-1, 1}.
+ Data source: sklearn.datasets.fetch_openml('mnist_784').
+ """
+ state_dim = 2**num_qubits
+ rng = np.random.default_rng(seed)
+
+ X_raw, y_raw = fetch_openml(
+ "mnist_784", version=1, return_X_y=True, as_frame=False, parser="auto"
+ )
+ y = y_raw.astype(int)
+ mask = (y == digits[0]) | (y == digits[1])
+ X = np.asarray(X_raw[mask], dtype=np.float64)
+ y = y[mask]
+
+ # Balanced subsample
+ idx0 = np.where(y == digits[0])[0]
+ idx1 = np.where(y == digits[1])[0]
+ n0 = min(n_samples, len(idx0))
+ n1 = min(n_samples, len(idx1))
+ sel = np.concatenate(
+ [
+ rng.choice(idx0, size=n0, replace=False),
+ rng.choice(idx1, size=n1, replace=False),
+ ]
+ )
+ rng.shuffle(sel)
+ X = X[sel]
+ y = y[sel]
+
+ # StandardScaler -> PCA -> L2 norm
+ X = StandardScaler().fit_transform(X)
+ n_components = min(state_dim, X.shape[1], X.shape[0])
+ X = PCA(n_components=n_components, random_state=seed).fit_transform(X)
+ if n_components < state_dim:
+ X = np.pad(X, ((0, 0), (0, state_dim - n_components)),
constant_values=0.0)
+ norm = np.sqrt(np.sum(X**2, axis=-1)) + 1e-12
+ X_norm = (X.T / norm).T
+
+ # Labels: first digit -> -1, second digit -> +1
+ Y = np.where(y == digits[0], -1.0, 1.0)
+ return X_norm, Y
+
+
+# --- Encoding: QDP (QdpEngine.encode + amplitude) ---
+def encode_via_qdp(
+ X_norm: np.ndarray,
+ num_qubits: int,
+ batch_size: int = 10, # kept for CLI symmetry; not used here
+ device_id: int = 0,
+ data_dir: str | None = None,
+ filename: str = "mnist_nd.npy",
+) -> torch.Tensor:
+ """QDP: use QdpEngine.encode on PCA-reduced vectors (amplitude), return
encoded tensor on GPU.
+
+ Uses in-memory encoding via QdpEngine instead of writing/reading .npy
files. The returned
+ tensor stays on the selected CUDA device and can be fed directly to
qml.StatePrep.
+ """
+ n, dim = X_norm.shape
+ state_dim = 2**num_qubits
+ if dim != state_dim:
+ raise ValueError(
+ f"X_norm must have {state_dim} features for {num_qubits} qubits,
got {dim}"
+ )
+ engine = QdpEngine(device_id=device_id, precision="float32")
+ qt = engine.encode(
+ X_norm.astype(np.float64),
+ num_qubits=num_qubits,
+ encoding_method="amplitude",
+ )
+ encoded = torch.from_dlpack(qt)
+ return encoded[:n]
+
+
+# --- Training: GPU (lightning.gpu + torch + adjoint) ---
+def run_training(
+ encoded_train: torch.Tensor,
+ encoded_test: torch.Tensor,
+ Y_train: np.ndarray,
+ Y_test: np.ndarray,
+ *,
+ num_qubits: int,
+ num_layers: int,
+ iterations: int,
+ batch_size: int,
+ lr: float,
+ seed: int,
+ early_stop_target: float | None = None,
+) -> dict[str, Any]:
+ """Train variational classifier: StatePrep(encoded) + Rot layers + bias,
square loss, batched.
+ Uses lightning.gpu + torch interface + adjoint diff. Data stays on GPU.
+ Optional early stop when test acc >= target."""
+ n_train = len(Y_train)
+ rng = np.random.default_rng(seed)
+
+ wires = tuple(range(num_qubits))
+ device = encoded_train.device
+ # Encoded data may be complex (from QDP); use real dtype for weights and
labels.
+ real_dtype = (
+ torch.float64 if encoded_train.dtype == torch.complex128 else
torch.float32
+ )
+ Y_train_t = torch.tensor(Y_train, dtype=real_dtype, device=device)
+ Y_test_t = torch.tensor(Y_test, dtype=real_dtype, device=device)
+
+ dev_qml = qml.device("lightning.gpu", wires=num_qubits)
+
+ @qml.qnode(dev_qml, interface="torch", diff_method="adjoint")
+ def circuit(weights, state_vector):
+ qml.StatePrep(state_vector, wires=wires)
+ for lw in weights:
+ layer(lw, wires=wires)
+ return qml.expval(qml.PauliZ(0))
+
+ def model(weights, bias, state_batch):
+ return circuit(weights, state_batch) + bias
+
+ def cost(weights, bias, X_batch, Y_batch):
+ preds = model(weights, bias, X_batch)
+ return torch.mean((Y_batch - preds) ** 2)
+
+ torch.manual_seed(seed)
+ weights = (
+ (0.01 * torch.randn(num_layers, num_qubits, 3, device=device,
dtype=real_dtype))
+ .detach()
+ .requires_grad_(True)
+ )
+ bias = (
+ torch.zeros(1, device=device,
dtype=real_dtype).squeeze().requires_grad_(True)
+ )
+ opt = torch.optim.SGD([weights, bias], lr=lr, momentum=0.9, nesterov=True)
+
+ # Compile (first run)
+ t0 = time.perf_counter()
+ _ = circuit(weights, encoded_train[0:1])
+ _ = cost(weights, bias, encoded_train[:1], Y_train_t[:1])
+ compile_sec = time.perf_counter() - t0
+
+ # Optimize
+ t0 = time.perf_counter()
+ steps_done = 0
+ step_iter = (
+ trange(iterations, desc=" Training (GPU)", leave=False)
+ if trange
+ else range(iterations)
+ )
+ for step in step_iter:
+ opt.zero_grad()
+ batch_idx = rng.integers(0, n_train, size=(batch_size,))
+ fb = encoded_train[batch_idx]
+ yb = Y_train_t[batch_idx]
+ loss = cost(weights, bias, fb, yb)
+ loss.backward()
+ opt.step()
+ steps_done += 1
+ if early_stop_target is not None and (step + 1) % 100 == 0:
+ with torch.no_grad():
+ pred_test_now = torch.sign(model(weights, bias,
encoded_test)).flatten()
+ test_acc_now = (
+ (pred_test_now -
Y_test_t).abs().lt(1e-5).float().mean().item()
+ )
+ if test_acc_now >= early_stop_target:
+ break
+ train_sec = time.perf_counter() - t0
+
+ with torch.no_grad():
+ pred_train = torch.sign(model(weights, bias, encoded_train)).flatten()
+ pred_test = torch.sign(model(weights, bias, encoded_test)).flatten()
+ train_acc = (pred_train - Y_train_t).abs().lt(1e-5).float().mean().item()
+ test_acc = (pred_test - Y_test_t).abs().lt(1e-5).float().mean().item()
+
+ return {
+ "compile_time_sec": compile_sec,
+ "train_time_sec": train_sec,
+ "train_accuracy": float(train_acc),
+ "test_accuracy": float(test_acc),
+ "n_train": n_train,
+ "n_test": len(Y_test),
+ "epochs": steps_done,
+ "samples_per_sec": (steps_done * batch_size) / train_sec
+ if train_sec > 0
+ else 0.0,
+ "qml_device": "cuda",
+ }
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser(
+ description="QDP MNIST amplitude encoding pipeline (2-class, same
training as baseline)"
+ )
+ parser.add_argument(
+ "--qubits",
+ type=int,
+ default=DEFAULT_NUM_QUBITS,
+ help=f"Number of qubits; PCA reduces to 2^qubits features (default:
{DEFAULT_NUM_QUBITS})",
+ )
+ parser.add_argument(
+ "--digits",
+ type=str,
+ default="3,6",
+ help="Two digits for binary classification, comma-separated (default:
'3,6')",
+ )
+ parser.add_argument(
+ "--n-samples",
+ type=int,
+ default=DEFAULT_N_SAMPLES,
+ help=f"Max samples per class (default: {DEFAULT_N_SAMPLES}; total <=
2*n_samples)",
+ )
+ parser.add_argument(
+ "--iters",
+ type=int,
+ default=2000,
+ help="Max optimizer steps per run (default: 2000)",
+ )
+ parser.add_argument(
+ "--batch-size", type=int, default=10, help="Batch size (default: 10)"
+ )
+ parser.add_argument(
+ "--layers", type=int, default=10, help="Variational layers (default:
10)"
+ )
+ parser.add_argument(
+ "--lr", type=float, default=0.05, help="Learning rate (default: 0.05)"
+ )
+ parser.add_argument(
+ "--test-size",
+ type=float,
+ default=0.25,
+ help="Test fraction (default: 0.25)",
+ )
+ parser.add_argument("--seed", type=int, default=0, help="Random seed
(default: 0)")
+ parser.add_argument(
+ "--trials",
+ type=int,
+ default=10,
+ help="Number of restarts; best test acc reported (default: 10)",
+ )
+ parser.add_argument(
+ "--early-stop",
+ type=float,
+ default=0.95,
+ help="Stop run when test acc >= this (default: 0.95; 0 = off)",
+ )
+ parser.add_argument(
+ "--device-id", type=int, default=0, help="QDP device (default: 0)"
+ )
+ parser.add_argument(
+ "--data-dir", type=str, default=None, help="Dir for .npy files
(default: temp)"
+ )
+ args = parser.parse_args()
+
+ d0, d1 = (int(d) for d in args.digits.split(","))
+ digits = (d0, d1)
+ X_norm, Y = load_mnist_binary_nd(
+ digits=digits,
+ n_samples=args.n_samples,
+ num_qubits=args.qubits,
+ seed=args.seed,
+ )
+ n = len(Y)
+ state_dim = 2**args.qubits
+ rng = np.random.default_rng(args.seed)
+ idx = rng.permutation(n)
+ n_train = int(n * (1 - args.test_size))
+ train_idx, test_idx = idx[:n_train], idx[n_train:]
+ X_train = X_norm[train_idx]
+ X_test = X_norm[test_idx]
+ Y_train = Y[train_idx]
+ Y_test = Y[test_idx]
+
+ # QDP encoding
+ t0 = time.perf_counter()
+ encoded_train = encode_via_qdp(
+ X_train,
+ num_qubits=args.qubits,
+ batch_size=args.batch_size,
+ device_id=args.device_id,
+ data_dir=args.data_dir,
+ filename="mnist_nd_train.npy",
+ )
+ encoded_test = encode_via_qdp(
+ X_test,
+ num_qubits=args.qubits,
+ batch_size=args.batch_size,
+ device_id=args.device_id,
+ data_dir=args.data_dir,
+ filename="mnist_nd_test.npy",
+ )
+ encode_sec = time.perf_counter() - t0
+
+ print("MNIST amplitude (QDP encoding) — 2-class variational classifier")
+ print(
+ f" Data: fetch_openml('mnist_784'), digits {d0} vs {d1}, "
+ f"PCA {state_dim}-D, QDP amplitude (n={n})"
+ )
+ print(
+ f" Qubits: {args.qubits}, iters: {args.iters}, batch_size:
{args.batch_size}, "
+ f"layers: {args.layers}, lr: {args.lr}"
+ )
+ print(
+ f" QDP encode: {encode_sec:.4f} s (train + test, {n_train} + {n -
n_train} samples)"
+ )
+
+ results: list[dict[str, Any]] = []
+ early_stop = args.early_stop if args.early_stop > 0 else None
+ for t in range(args.trials):
+ r = run_training(
+ encoded_train,
+ encoded_test,
+ Y_train,
+ Y_test,
+ num_qubits=args.qubits,
+ num_layers=args.layers,
+ iterations=args.iters,
+ batch_size=args.batch_size,
+ lr=args.lr,
+ seed=args.seed + t,
+ early_stop_target=early_stop,
+ )
+ results.append(r)
+ print(f"\n Trial {t + 1}:")
+ print(f" QML device: {r.get('qml_device', 'cpu')}")
+ print(f" Compile: {r['compile_time_sec']:.4f} s")
+ print(f" Train: {r['train_time_sec']:.4f} s")
+ print(f" Train acc: {r['train_accuracy']:.4f} (n={r['n_train']})")
+ print(f" Test acc: {r['test_accuracy']:.4f} (n={r['n_test']})")
+ print(f" Throughput: {r['samples_per_sec']:.1f} samples/s")
+
+ if args.trials > 1:
+ test_accs = sorted(r["test_accuracy"] for r in results)
+ best = test_accs[-1]
+ mid = args.trials // 2
+ print(
+ f"\n Best test accuracy: {best:.4f} (median:
{test_accs[mid]:.4f}, "
+ f"min: {test_accs[0]:.4f}, max: {test_accs[-1]:.4f})"
+ )
+ if best >= 0.95:
+ print(" → Target ≥0.95 achieved.")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/uv.lock b/uv.lock
index 678148684..36e21735a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2076,12 +2076,8 @@ dev = [
[[package]]
name = "qumat-qdp"
source = { editable = "qdp/qdp-python" }
-dependencies = [
- { name = "qumat" },
-]
[package.metadata]
-requires-dist = [{ name = "qumat", editable = "." }]
[package.metadata.requires-dev]
benchmark = [