https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/154100
Fixes #153808 >From 8db58512f56eaadb18cb97129386b0a7b6695266 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Mon, 18 Aug 2025 13:37:55 +0900 Subject: [PATCH] DAG: Avoid creating illegal extract_subvector in legalizer Fixes #153808 --- .../SelectionDAG/LegalizeVectorTypes.cpp | 27 ++++++++-- .../issue153808-extract-subvector-legalize.ll | 51 +++++++++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index bc2dbfb4cbaae..a252d911a1d4d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3842,13 +3842,32 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); uint64_t IdxVal = Idx->getAsZExtVal(); + unsigned NumResultElts = SubVT.getVectorMinNumElements(); + if (IdxVal < LoEltsMin) { - assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && + assert(IdxVal + NumResultElts <= LoEltsMin && "Extracted subvector crosses vector split!"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); - } else if (SubVT.isScalableVector() == - N->getOperand(0).getValueType().isScalableVector()) - return DAG.getExtractSubvector(dl, SubVT, Hi, IdxVal - LoEltsMin); + } + + EVT SrcVT = N->getOperand(0).getValueType(); + if (SubVT.isScalableVector() == SrcVT.isScalableVector()) { + uint64_t ExtractIdx = IdxVal - LoEltsMin; + if (ExtractIdx % NumResultElts == 0) + return DAG.getExtractSubvector(dl, SubVT, Hi, ExtractIdx); + + // We cannot create an extract_subvector that isn't a multiple of the result + // size, which may go out of bounds for the last elements. Shuffle the + // desired elements down to 0 and do a simple 0 extract. + EVT HiVT = Hi.getValueType(); + SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1); + for (int I = 0; I != static_cast<int>(NumResultElts); ++I) + Mask[I] = ExtractIdx + I; + + SDValue Shuffle = + DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask); + return DAG.getExtractSubvector(dl, SubVT, Shuffle, 0); + } // After this point the DAG node only permits extracting fixed-width // subvectors from scalable vectors. diff --git a/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll new file mode 100644 index 0000000000000..f1b1ea3fbd6d7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue153808-extract-subvector-legalize.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s + +define <3 x float> @issue153808_vector_extract_assert(ptr addrspace(1) %ptr) #0 { +; GFX900-LABEL: issue153808_vector_extract_assert: +; GFX900: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: v_mov_b32_e32 v4, v1 +; GFX900-NEXT: v_mov_b32_e32 v3, v0 +; GFX900-NEXT: global_load_dwordx4 v[5:8], v[3:4], off +; GFX900-NEXT: global_load_dwordx3 v[0:2], v[3:4], off offset:192 +; GFX900-NEXT: s_mov_b32 s4, 0 +; GFX900-NEXT: s_mov_b32 s5, s4 +; GFX900-NEXT: s_mov_b32 s6, s4 +; GFX900-NEXT: s_mov_b32 s7, s4 +; GFX900-NEXT: s_waitcnt vmcnt(1) +; GFX900-NEXT: buffer_store_dwordx4 v[5:8], off, s[4:7], 0 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX942-LABEL: issue153808_vector_extract_assert: +; GFX942: ; %bb.0: +; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: global_load_dwordx4 v[6:9], v[0:1], off +; GFX942-NEXT: global_load_dwordx3 v[2:4], v[0:1], off offset:192 +; GFX942-NEXT: s_mov_b32 s0, 0 +; GFX942-NEXT: s_mov_b32 s1, s0 +; GFX942-NEXT: s_mov_b32 s2, s0 +; GFX942-NEXT: s_mov_b32 s3, s0 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 +; GFX942-NEXT: s_waitcnt vmcnt(1) +; GFX942-NEXT: v_mov_b32_e32 v0, v2 +; GFX942-NEXT: v_mov_b32_e32 v1, v3 +; GFX942-NEXT: v_mov_b32_e32 v2, v4 +; GFX942-NEXT: s_waitcnt vmcnt(0) +; GFX942-NEXT: s_setpc_b64 s[30:31] + %val = load <51 x float>, ptr addrspace(1) %ptr, align 4 + %val.slice.0 = shufflevector <51 x float> %val, <51 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %val.slice.0, ptr addrspace(8) null, i32 0, i32 0, i32 0) + %val.slice.48 = shufflevector <51 x float> %val, <51 x float> poison, <3 x i32> <i32 48, i32 49, i32 50> + ret <3 x float> %val.slice.48 +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) writeonly captures(none), i32, i32, i32 immarg) #1 + +attributes #0 = { nounwind } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX9: {{.*}} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits