Author: Florian Hahn Date: 2026-01-14T20:46:19Z New Revision: e5507c49f7fd184f598e8b62d6afe76365c530ad
URL: https://github.com/llvm/llvm-project/commit/e5507c49f7fd184f598e8b62d6afe76365c530ad DIFF: https://github.com/llvm/llvm-project/commit/e5507c49f7fd184f598e8b62d6afe76365c530ad.diff LOG: Revert "[SelDag] Use use BoolVT size when expanding find-last-active, if larg…" This reverts commit 68a04c1adae8dc2a031a1d483d2b73dbbe92f060. Added: Modified: llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/test/CodeGen/X86/vector-extract-last-active.ll Removed: ################################################################################ diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b71059a6ce354..64f73a4b5d699 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9729,12 +9729,9 @@ SDValue TargetLowering::expandVectorFindLastActive(SDNode *N, if (MaskVT.isScalableVector()) VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - uint64_t EltWidth = TLI.getBitWidthForCttzElements( + unsigned EltWidth = TLI.getBitWidthForCttzElements( BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(), /*ZeroIsPoison=*/true, &VScaleRange); - // If the step vector element type is smaller than the mask element type, - // use the mask type directly to avoid widening issues. - EltWidth = std::max(EltWidth, BoolVT.getFixedSizeInBits()); EVT StepVT = MVT::getIntegerVT(EltWidth); EVT StepVecVT = MaskVT.changeVectorElementType(*DAG.getContext(), StepVT); diff --git a/llvm/test/CodeGen/X86/vector-extract-last-active.ll b/llvm/test/CodeGen/X86/vector-extract-last-active.ll index e891b27de2756..09d305eaaeb77 100644 --- a/llvm/test/CodeGen/X86/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/X86/vector-extract-last-active.ll @@ -8,28 +8,32 @@ define i32 @extract_last_active_v4i32(<4 x i32> %a, <4 x i1> %c) { ; CHECK-LABEL: extract_last_active_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; CHECK-NEXT: movd %xmm2, %eax +; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] ; CHECK-NEXT: movd %xmm2, %ecx -; CHECK-NEXT: movd %xmm1, %edx ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] -; CHECK-NEXT: movd %xmm2, %esi +; CHECK-NEXT: movd %xmm2, %edx +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] +; CHECK-NEXT: movd %xmm1, %esi ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; CHECK-NEXT: movd %xmm0, %edi -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] -; CHECK-NEXT: movd %xmm0, %r8d -; CHECK-NEXT: cmpl %edi, %r8d -; CHECK-NEXT: cmoval %r8d, %edi -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] -; CHECK-NEXT: movd %xmm0, %r8d -; CHECK-NEXT: cmpl %r8d, %edi +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: setne %dil +; CHECK-NEXT: leal (%rdi,%rdi,2), %r8d +; CHECK-NEXT: xorl %r9d, %r9d +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: setne %r9b +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: setne %dil +; CHECK-NEXT: addl %edi, %edi +; CHECK-NEXT: cmpb %dil, %r9b +; CHECK-NEXT: cmoval %r9d, %edi +; CHECK-NEXT: cmpb %r8b, %dil ; CHECK-NEXT: cmovbel %r8d, %edi -; CHECK-NEXT: orl %edx, %esi -; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: orl %esi, %ecx +; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: andb $1, %cl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $1, %cl @@ -44,18 +48,28 @@ define i32 @extract_last_active_v4i32_no_default(<4 x i32> %a, <4 x i1> %c) { ; CHECK-LABEL: extract_last_active_v4i32_no_default: ; CHECK: # %bb.0: ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] ; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setne %cl +; CHECK-NEXT: leal (%rcx,%rcx,2), %eax ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] ; CHECK-NEXT: movd %xmm0, %ecx -; CHECK-NEXT: cmpl %eax, %ecx -; CHECK-NEXT: cmoval %ecx, %eax -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3] +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: setne %dl +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] ; CHECK-NEXT: movd %xmm0, %ecx -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: cmovbel %ecx, %eax -; CHECK-NEXT: movl -24(%rsp,%rax,4), %eax +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: setne %sil +; CHECK-NEXT: addl %esi, %esi +; CHECK-NEXT: cmpb %sil, %dl +; CHECK-NEXT: cmoval %edx, %esi +; CHECK-NEXT: cmpb %al, %sil +; CHECK-NEXT: cmovbel %eax, %esi +; CHECK-NEXT: movl -24(%rsp,%rsi,4), %eax ; CHECK-NEXT: retq %res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 poison) ret i32 %res @@ -67,15 +81,18 @@ define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) { ; CHECK: # %bb.0: ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] ; CHECK-NEXT: movq %xmm2, %rcx -; CHECK-NEXT: movq %xmm1, %rdx +; CHECK-NEXT: movq %xmm1, %rax ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: orl %eax, %edx ; CHECK-NEXT: andb $1, %dl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $1, %dl ; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: orl -24(%rsp,%rcx,4), %eax +; CHECK-NEXT: xorl %edx, %edx +; CHECK-NEXT: testq %rcx, %rcx +; CHECK-NEXT: setne %dl +; CHECK-NEXT: orl -24(%rsp,%rdx,4), %eax ; CHECK-NEXT: retq %res = call i32 @llvm.experimental.vector.extract.last.active.v2i32(<2 x i32> %a, <2 x i1> %c, i32 -1) ret i32 %res @@ -86,24 +103,23 @@ define i32 @extract_last_active_v2i32(<2 x i32> %a, <2 x i1> %c) { define i32 @extract_last_active_v3i32(<3 x i32> %a, <3 x i1> %c) { ; CHECK-LABEL: extract_last_active_v3i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movd %edx, %xmm1 -; CHECK-NEXT: movd %esi, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,1,1] -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] -; CHECK-NEXT: movd %xmm0, %ecx -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1] -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: cmoval %eax, %ecx ; CHECK-NEXT: orl %esi, %edi ; CHECK-NEXT: orl %edx, %edi ; CHECK-NEXT: andb $1, %dil ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $1, %dil ; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: setne %cl +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: setne %sil +; CHECK-NEXT: addl %esi, %esi +; CHECK-NEXT: cmpb %sil, %cl +; CHECK-NEXT: cmoval %ecx, %esi +; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: orl -24(%rsp,%rcx,4), %eax ; CHECK-NEXT: retq %res = call i32 @llvm.experimental.vector.extract.last.active.v3i32(<3 x i32> %a, <3 x i1> %c, i32 -1) @@ -114,53 +130,75 @@ define i32 @extract_last_active_v3i32(<3 x i32> %a, <3 x i1> %c) { define i32 @extract_last_active_v8i32(<8 x i32> %a, <8 x i1> %c) { ; CHECK-LABEL: extract_last_active_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movd %xmm2, %edi ; CHECK-NEXT: pextrw $7, %xmm2, %eax -; CHECK-NEXT: pextrw $6, %xmm2, %esi +; CHECK-NEXT: pextrw $6, %xmm2, %edx ; CHECK-NEXT: pextrw $5, %xmm2, %r8d ; CHECK-NEXT: pextrw $4, %xmm2, %ecx +; CHECK-NEXT: pextrw $2, %xmm2, %esi +; CHECK-NEXT: pextrw $1, %xmm2, %r10d ; CHECK-NEXT: pextrw $3, %xmm2, %r9d -; CHECK-NEXT: pextrw $2, %xmm2, %edi -; CHECK-NEXT: pextrw $1, %xmm2, %r11d -; CHECK-NEXT: movd %xmm2, %r10d ; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; CHECK-NEXT: pextrw $2, %xmm2, %edx -; CHECK-NEXT: pextrw $1, %xmm2, %ebx -; CHECK-NEXT: cmpw %dx, %bx -; CHECK-NEXT: cmoval %ebx, %edx -; CHECK-NEXT: pextrw $3, %xmm2, %ebx -; CHECK-NEXT: cmpw %bx, %dx -; CHECK-NEXT: cmovbel %ebx, %edx -; CHECK-NEXT: pextrw $4, %xmm2, %ebx -; CHECK-NEXT: cmpw %bx, %dx -; CHECK-NEXT: cmovbel %ebx, %edx -; CHECK-NEXT: pextrw $5, %xmm2, %ebx -; CHECK-NEXT: cmpw %bx, %dx -; CHECK-NEXT: cmovbel %ebx, %edx -; CHECK-NEXT: pextrw $6, %xmm2, %ebx -; CHECK-NEXT: cmpw %bx, %dx -; CHECK-NEXT: cmovbel %ebx, %edx -; CHECK-NEXT: pextrw $7, %xmm2, %ebx -; CHECK-NEXT: cmpw %bx, %dx -; CHECK-NEXT: cmovbel %ebx, %edx -; CHECK-NEXT: andl $7, %edx -; CHECK-NEXT: orl %r11d, %r10d -; CHECK-NEXT: orl %r9d, %edi +; CHECK-NEXT: xorl %r11d, %r11d +; CHECK-NEXT: testl %r9d, %r9d +; CHECK-NEXT: setne %r11b +; CHECK-NEXT: leal (%r11,%r11,2), %r11d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: testl %r10d, %r10d +; CHECK-NEXT: setne %bl +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: setne %bpl +; CHECK-NEXT: addl %ebp, %ebp +; CHECK-NEXT: cmpb %bpl, %bl +; CHECK-NEXT: cmoval %ebx, %ebp +; CHECK-NEXT: cmpb %r11b, %bpl +; CHECK-NEXT: cmovbel %r11d, %ebp +; CHECK-NEXT: xorl %r11d, %r11d +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: setne %r11b +; CHECK-NEXT: shll $2, %r11d +; CHECK-NEXT: cmpb %r11b, %bpl +; CHECK-NEXT: cmoval %ebp, %r11d +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: testl %r8d, %r8d +; CHECK-NEXT: setne %bl +; CHECK-NEXT: leal (%rbx,%rbx,4), %ebx +; CHECK-NEXT: cmpb %bl, %r11b +; CHECK-NEXT: cmovbel %ebx, %r11d +; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: movl $6, %ebx +; CHECK-NEXT: cmovel %edx, %ebx +; CHECK-NEXT: cmpb %bl, %r11b +; CHECK-NEXT: cmoval %r11d, %ebx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: movl $7, %r11d +; CHECK-NEXT: cmovel %eax, %r11d +; CHECK-NEXT: cmpb %r11b, %bl +; CHECK-NEXT: cmoval %ebx, %r11d +; CHECK-NEXT: andl $7, %r11d ; CHECK-NEXT: orl %r10d, %edi +; CHECK-NEXT: orl %r9d, %esi +; CHECK-NEXT: orl %edi, %esi ; CHECK-NEXT: orl %r8d, %ecx +; CHECK-NEXT: orl %edx, %ecx ; CHECK-NEXT: orl %esi, %ecx -; CHECK-NEXT: orl %edi, %ecx ; CHECK-NEXT: orl %eax, %ecx ; CHECK-NEXT: andb $1, %cl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpb $1, %cl ; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: orl -32(%rsp,%rdx,4), %eax +; CHECK-NEXT: orl -40(%rsp,%r11,4), %eax ; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq %res = call i32 @llvm.experimental.vector.extract.last.active.v8i32(<8 x i32> %a, <8 x i1> %c, i32 -1) @@ -247,27 +285,3 @@ define i32 @extract_last_active_v16i32(<16 x i32> %a, <16 x i1> %c) { %res = call i32 @llvm.experimental.vector.extract.last.active.v16i32(<16 x i32> %a, <16 x i1> %c, i32 -1) ret i32 %res } - -; Test for older x86 CPUs (pre-AVX) where the mask type legalization creates -; larger element types, requiring the step vector type adjustment. -define i32 @extract_last_active_v4i32_penryn(<4 x i32> %a, <4 x i1> %c) "target-cpu"="penryn" { -; CHECK-LABEL: extract_last_active_v4i32_penryn: -; CHECK: # %bb.0: -; CHECK-NEXT: movaps %xmm0, %xmm2 -; CHECK-NEXT: xorps %xmm3, %xmm3 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: blendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; CHECK-NEXT: extractps $2, %xmm3, %eax -; CHECK-NEXT: extractps $1, %xmm3, %ecx -; CHECK-NEXT: cmpl %eax, %ecx -; CHECK-NEXT: cmoval %ecx, %eax -; CHECK-NEXT: extractps $3, %xmm3, %ecx -; CHECK-NEXT: cmpl %ecx, %eax -; CHECK-NEXT: cmovbel %ecx, %eax -; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: shll $2, %eax -; CHECK-NEXT: movl -24(%rsp,%rax), %eax -; CHECK-NEXT: retq - %res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %a, <4 x i1> %c, i32 poison) - ret i32 %res -} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
