This is an automated email from the ASF dual-hosted git repository. andrewmusselman pushed a commit to branch pytorch-gpu-capability-check in repository https://gitbox.apache.org/repos/asf/mahout.git
commit 2f96816e0d36742146aa96ca15bf355f1e8e537f Author: Andrew Musselman <[email protected]> AuthorDate: Sun May 17 08:38:48 2026 -0700 fix(qdp): fall back to CPU when GPU arch not in PyTorch's compiled list --- qdp/qdp-python/qumat_qdp/api.py | 11 ++------ qdp/qdp-python/qumat_qdp/loader.py | 53 ++++++++++++++++++++++++++++++------ testing/qdp_python/test_torch_ref.py | 32 ++++++++++++++++++++-- 3 files changed, 76 insertions(+), 20 deletions(-) diff --git a/qdp/qdp-python/qumat_qdp/api.py b/qdp/qdp-python/qumat_qdp/api.py index 6493dd0f3..e1e8b20e3 100644 --- a/qdp/qdp-python/qumat_qdp/api.py +++ b/qdp/qdp-python/qumat_qdp/api.py @@ -189,17 +189,10 @@ class QdpBenchmark: def _run_throughput_pytorch(self) -> ThroughputResult: import torch + from qumat_qdp.loader import _select_torch_device from qumat_qdp.torch_ref import encode - if torch.cuda.is_available(): - if self._device_id < 0 or self._device_id >= torch.cuda.device_count(): - raise ValueError( - f"Invalid CUDA device_id {self._device_id}; " - f"{torch.cuda.device_count()} device(s) available." - ) - device = f"cuda:{self._device_id}" - else: - device = "cpu" + device = _select_torch_device(torch, self._device_id) # _validate() guarantees these are not None. assert self._num_qubits is not None assert self._total_batches is not None diff --git a/qdp/qdp-python/qumat_qdp/loader.py b/qdp/qdp-python/qumat_qdp/loader.py index a3443f1ba..34fae6a1a 100644 --- a/qdp/qdp-python/qumat_qdp/loader.py +++ b/qdp/qdp-python/qumat_qdp/loader.py @@ -75,6 +75,49 @@ _BACKEND_AUTO = "auto" _VALID_BACKENDS = frozenset({_BACKEND_RUST, _BACKEND_PYTORCH, _BACKEND_AUTO}) +def _select_torch_device(torch, device_id: int) -> str: + """Pick a torch device the current PyTorch build can actually use. + + ``torch.cuda.is_available()`` returns True whenever a usable driver and at + least one GPU are present, but does not check whether the GPU's compute + capability is in the PyTorch wheel's compiled arch list. Running on an + unsupported GPU surfaces as ``cudaErrorNoKernelImageForDevice`` the first + time a kernel launches -- a particularly opaque failure for users on + Pascal-and-earlier hardware where recent PyTorch wheels no longer ship + matching kernels. + + Intersect the device's capability with ``torch.cuda.get_arch_list()`` and + fall back to CPU (with a warning) when they don't match. Raises + ``ValueError`` on an out-of-range ``device_id`` to preserve the prior + contract for callers that explicitly request a specific GPU. + """ + if not torch.cuda.is_available(): + return "cpu" + + if device_id < 0 or device_id >= torch.cuda.device_count(): + raise ValueError( + f"Invalid CUDA device_id {device_id}; " + f"{torch.cuda.device_count()} device(s) available." + ) + + arch_list = torch.cuda.get_arch_list() + if arch_list: + major, minor = torch.cuda.get_device_capability(device_id) + device_arch = f"sm_{major}{minor}" + if device_arch not in arch_list: + warnings.warn( + f"GPU {device_id} ({torch.cuda.get_device_name(device_id)}, " + f"{device_arch}) is not in this PyTorch build's supported " + f"arch list ({sorted(arch_list)}). Falling back to CPU. " + "Install a PyTorch wheel that targets this GPU, or set " + "CUDA_VISIBLE_DEVICES= to silence this warning.", + stacklevel=2, + ) + return "cpu" + + return f"cuda:{device_id}" + + def _path_extension(path: str) -> str: """Return the lowercase extension of `path` (handling remote URLs/queries).""" is_remote = "://" in path @@ -478,15 +521,7 @@ class QuantumDataLoader: from qumat_qdp.torch_ref import encode - if torch.cuda.is_available(): - if self._device_id < 0 or self._device_id >= torch.cuda.device_count(): - raise ValueError( - f"Invalid CUDA device_id {self._device_id}; " - f"{torch.cuda.device_count()} device(s) available." - ) - device = f"cuda:{self._device_id}" - else: - device = "cpu" + device = _select_torch_device(torch, self._device_id) if use_synthetic: return self._pytorch_synthetic_iter(torch, encode, device) diff --git a/testing/qdp_python/test_torch_ref.py b/testing/qdp_python/test_torch_ref.py index c6c49883b..0fe3fffd1 100644 --- a/testing/qdp_python/test_torch_ref.py +++ b/testing/qdp_python/test_torch_ref.py @@ -36,6 +36,28 @@ from qumat_qdp.torch_ref import ( iqp_encode, ) + +def _torch_cuda_usable(device_id: int = 0) -> bool: + """True iff the current PyTorch build can launch kernels on ``device_id``. + + ``torch.cuda.is_available()`` alone is not enough: on GPUs whose compute + capability isn't in the wheel's compiled arch list (e.g. Pascal sm_61 + against a recent wheel that ships sm_70+), it returns True but every + kernel launch fails with ``cudaErrorNoKernelImageForDevice``. Mirror + ``qumat_qdp.loader._select_torch_device``'s capability check so the + GPU-only tests skip cleanly instead of erroring. + """ + if not torch.cuda.is_available(): + return False + if device_id < 0 or device_id >= torch.cuda.device_count(): + return False + arch_list = torch.cuda.get_arch_list() + if not arch_list: + return True + major, minor = torch.cuda.get_device_capability(device_id) + return f"sm_{major}{minor}" in arch_list + + # --------------------------------------------------------------------------- # Amplitude encoding # --------------------------------------------------------------------------- @@ -349,7 +371,10 @@ class TestDevicePlacement: result = amplitude_encode(data, num_qubits=2, device="cpu") assert result.device.type == "cpu" - @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") + @pytest.mark.skipif( + not _torch_cuda_usable(), + reason="CUDA not available or GPU compute capability not supported by this PyTorch build", + ) def test_gpu_output(self): data = torch.randn(2, 4, dtype=torch.float64) result = amplitude_encode(data, num_qubits=2, device="cuda:0") @@ -369,7 +394,10 @@ class TestCrossValidation: pytest.importorskip("_qdp") @pytest.mark.gpu - @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") + @pytest.mark.skipif( + not _torch_cuda_usable(), + reason="CUDA not available or GPU compute capability not supported by this PyTorch build", + ) @pytest.mark.parametrize("encoding", ["amplitude", "angle", "basis", "iqp"]) def test_encoding_matches_rust(self, encoding): import _qdp
