Author: David Sherwood Date: 2022-02-22T11:13:38-08:00 New Revision: 8c33ea3ab0ef304e6f43be31e9c72660a6261bf7
URL: https://github.com/llvm/llvm-project/commit/8c33ea3ab0ef304e6f43be31e9c72660a6261bf7 DIFF: https://github.com/llvm/llvm-project/commit/8c33ea3ab0ef304e6f43be31e9c72660a6261bf7.diff LOG: [SVE][CodeGen] Bail out for scalable vectors in AArch64TargetLowering::ReconstructShuffle Previously the code in AArch64TargetLowering::ReconstructShuffle assumed the input vectors were always fixed-width, however this is not always the case since you can extract elements from scalable vectors and insert into fixed-width ones. We were hitting crashes here for two different cases: 1. When lowering a fixed-length vector extract from a scalable vector with i1 element types. This happens due to the fact the i1 elements get promoted to larger integer types for fixed-width vectors and leads to sequences of INSERT_VECTOR_ELT and EXTRACT_VECTOR_ELT nodes. In this case AArch64TargetLowering::ReconstructShuffle will still fail to make a transformation, but at least it no longer crashes. 2. When lowering a sequence of extractelement/insertelement operations on mixed fixed-width/scalable vectors. For now, I've just changed AArch64TargetLowering::ReconstructShuffle to bail out if it finds a scalable vector. Tests for both instances described above have been added here: (1) CodeGen/AArch64/sve-extract-fixed-vector.ll (2) CodeGen/AArch64/sve-fixed-length-reshuffle.ll Differential Revision: https://reviews.llvm.org/D116602 (cherry picked from commit a57a7f3de551bd0ae4e27b1f0c85437cd3e2e834) Added: llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll Modified: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 792e45bdb0dd..aa2dca3eda4b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8990,12 +8990,13 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(V.getOperand(1))) { + !isa<ConstantSDNode>(V.getOperand(1)) || + V.getOperand(0).getValueType().isScalableVector()) { LLVM_DEBUG( dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from " - "various elements of other vectors, provided their " - "indices are constant\n"); + "various elements of other fixed-width vectors, provided " + "their indices are constant\n"); return SDValue(); } @@ -9039,8 +9040,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); - uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); - if (SrcVTSize == VTSize) + TypeSize SrcVTSize = SrcVT.getSizeInBits(); + if (SrcVTSize == TypeSize::Fixed(VTSize)) continue; // This stage of the search produces a source with the same element type as @@ -9049,7 +9050,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); - if (SrcVTSize < VTSize) { + if (SrcVTSize.getFixedValue() < VTSize) { assert(2 * SrcVTSize == VTSize); // We can pad out the smaller vector for free, so if it's part of a // shuffle... @@ -9059,7 +9060,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, continue; } - if (SrcVTSize != 2 * VTSize) { + if (SrcVTSize.getFixedValue() != 2 * VTSize) { LLVM_DEBUG( dbgs() << "Reshuffle failed: result vector too small to extract\n"); return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index 3be4b94dedd2..a3d161ec4102 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -361,6 +361,106 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1 ret <16 x i8> %retval } + +; Predicates + +define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) { +; CHECK-LABEL: extract_v2i1_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %inmask, i64 0) + ret <2 x i1> %mask +} + +define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) { +; CHECK-LABEL: extract_v4i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov w9, v1.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0) + ret <4 x i1> %mask +} + +define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) { +; CHECK-LABEL: extract_v8i1_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: umov w9, v1.h[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: umov w8, v1.h[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.h[4] +; CHECK-NEXT: mov v0.b[3], w8 +; CHECK-NEXT: umov w8, v1.h[5] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: umov w8, v1.h[7] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: mov v0.b[7], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0) + ret <8 x i1> %mask +} + +define <16 x i1> @extract_v16i1_nxv16i1(<vscale x 16 x i1> %inmask) { +; CHECK-LABEL: extract_v16i1_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: umov w9, v1.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.b[4] +; CHECK-NEXT: mov v0.b[3], w8 +; CHECK-NEXT: umov w8, v1.b[5] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: umov w8, v1.b[7] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: umov w9, v1.b[8] +; CHECK-NEXT: mov v0.b[7], w8 +; CHECK-NEXT: umov w8, v1.b[9] +; CHECK-NEXT: mov v0.b[8], w9 +; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: mov v0.b[9], w8 +; CHECK-NEXT: umov w8, v1.b[11] +; CHECK-NEXT: mov v0.b[10], w9 +; CHECK-NEXT: umov w9, v1.b[12] +; CHECK-NEXT: mov v0.b[11], w8 +; CHECK-NEXT: umov w8, v1.b[13] +; CHECK-NEXT: mov v0.b[12], w9 +; CHECK-NEXT: umov w9, v1.b[14] +; CHECK-NEXT: mov v0.b[13], w8 +; CHECK-NEXT: umov w8, v1.b[15] +; CHECK-NEXT: mov v0.b[14], w9 +; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: ret + %mask = call <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1(<vscale x 16 x i1> %inmask, i64 0) + ret <16 x i1> %mask +} + + ; Fixed length clamping define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind #0 { @@ -441,4 +541,9 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv8i8(<vscale x 8 x i declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8>, i64) declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8>, i64) +declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1>, i64) +declare <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1>, i64) +declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1>, i64) +declare <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1(<vscale x 16 x i1>, i64) + declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64>, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll new file mode 100644 index 000000000000..c826f7337b18 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; == Matching first N elements == + +define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 { +; CHECK-LABEL: reshuffle_v4i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov w9, v1.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %el0 = extractelement <vscale x 4 x i1> %a, i32 0 + %el1 = extractelement <vscale x 4 x i1> %a, i32 1 + %el2 = extractelement <vscale x 4 x i1> %a, i32 2 + %el3 = extractelement <vscale x 4 x i1> %a, i32 3 + %v0 = insertelement <4 x i1> undef, i1 %el0, i32 0 + %v1 = insertelement <4 x i1> %v0, i1 %el1, i32 1 + %v2 = insertelement <4 x i1> %v1, i1 %el2, i32 2 + %v3 = insertelement <4 x i1> %v2, i1 %el3, i32 3 + ret <4 x i1> %v3 +} + +attributes #0 = { "target-features"="+sve" } _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
