Author: Simon Pilgrim Date: 2026-01-07T22:02:45Z New Revision: 59188e19250b1797ae7e070de5e1e1ed0c388521
URL: https://github.com/llvm/llvm-project/commit/59188e19250b1797ae7e070de5e1e1ed0c388521 DIFF: https://github.com/llvm/llvm-project/commit/59188e19250b1797ae7e070de5e1e1ed0c388521.diff LOG: Revert "[X86] Allow EVEX compression for mask registers (#171980)" This reverts commit 1caf2704dd6791baa4b958d6a666ea64ec24795d. Added: Modified: llvm/lib/Target/X86/X86CompressEVEX.cpp llvm/test/CodeGen/X86/avx512-ext.ll llvm/test/CodeGen/X86/avx512-insert-extract.ll llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll llvm/test/CodeGen/X86/masked_compressstore.ll llvm/test/CodeGen/X86/masked_expandload.ll llvm/test/CodeGen/X86/masked_gather_scatter.ll llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll llvm/test/CodeGen/X86/masked_load.ll llvm/test/CodeGen/X86/masked_store.ll llvm/test/CodeGen/X86/pr77459.ll llvm/test/CodeGen/X86/vector-reduce-and-bool.ll llvm/test/CodeGen/X86/vector-shuffle-v1.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp index fd79772dcb7b1..59d653b84eb8a 100644 --- a/llvm/lib/Target/X86/X86CompressEVEX.cpp +++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp @@ -16,7 +16,6 @@ // d. NF_ND (EVEX) -> NF (EVEX) // e. NonNF (EVEX) -> NF (EVEX) // f. SETZUCCm (EVEX) -> SETCCm (legacy) -// g. VPMOV*2M (EVEX) + KMOV -> VMOVMSK/VPMOVMSKB (VEX) // // Compression a, b and c can always reduce code size, with some exceptions // such as promoted 16-bit CRC32 which is as long as the legacy version. @@ -42,7 +41,6 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysisManager.h" @@ -180,137 +178,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) { return true; } -static bool isKMovNarrowing(unsigned VPMOVOpc, unsigned KMOVOpc) { - unsigned VPMOVBits = 0; - switch (VPMOVOpc) { - case X86::VPMOVQ2MZ128kr: - VPMOVBits = 2; - break; - case X86::VPMOVQ2MZ256kr: - case X86::VPMOVD2MZ128kr: - VPMOVBits = 4; - break; - case X86::VPMOVD2MZ256kr: - VPMOVBits = 8; - break; - case X86::VPMOVB2MZ128kr: - VPMOVBits = 16; - break; - case X86::VPMOVB2MZ256kr: - VPMOVBits = 32; - break; - default: - llvm_unreachable("Unknown VPMOV opcode"); - } - - unsigned KMOVSize = 0; - switch (KMOVOpc) { - case X86::KMOVBrk: - KMOVSize = 8; - break; - case X86::KMOVWrk: - KMOVSize = 16; - break; - case X86::KMOVDrk: - KMOVSize = 32; - break; - default: - llvm_unreachable("Unknown KMOV opcode"); - } - - return KMOVSize < VPMOVBits; -} - -// Try to compress VPMOV*2M + KMOV chain patterns: -// vpmov*2m %xmm0, %k0 -> (erase this) -// kmov* %k0, %eax -> vmovmskp* %xmm0, %eax -static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB, - const X86Subtarget &ST, - SmallVectorImpl<MachineInstr *> &ToErase) { - const X86InstrInfo *TII = ST.getInstrInfo(); - const TargetRegisterInfo *TRI = ST.getRegisterInfo(); - - unsigned Opc = MI.getOpcode(); - if (Opc != X86::VPMOVD2MZ128kr && Opc != X86::VPMOVD2MZ256kr && - Opc != X86::VPMOVQ2MZ128kr && Opc != X86::VPMOVQ2MZ256kr && - Opc != X86::VPMOVB2MZ128kr && Opc != X86::VPMOVB2MZ256kr) - return false; - - Register MaskReg = MI.getOperand(0).getReg(); - Register SrcVecReg = MI.getOperand(1).getReg(); - - unsigned MovMskOpc = 0; - switch (Opc) { - case X86::VPMOVD2MZ128kr: - MovMskOpc = X86::VMOVMSKPSrr; - break; - case X86::VPMOVD2MZ256kr: - MovMskOpc = X86::VMOVMSKPSYrr; - break; - case X86::VPMOVQ2MZ128kr: - MovMskOpc = X86::VMOVMSKPDrr; - break; - case X86::VPMOVQ2MZ256kr: - MovMskOpc = X86::VMOVMSKPDYrr; - break; - case X86::VPMOVB2MZ128kr: - MovMskOpc = X86::VPMOVMSKBrr; - break; - case X86::VPMOVB2MZ256kr: - MovMskOpc = X86::VPMOVMSKBYrr; - break; - default: - llvm_unreachable("Unknown VPMOV opcode"); - } - - MachineInstr *KMovMI = nullptr; - - for (MachineInstr &CurMI : llvm::make_range( - std::next(MachineBasicBlock::iterator(MI)), MBB.end())) { - if (CurMI.modifiesRegister(MaskReg, TRI)) { - if (!KMovMI) - return false; // Mask clobbered before use - break; - } - - if (CurMI.readsRegister(MaskReg, TRI)) { - if (KMovMI) - return false; // Fail: Mask has MULTIPLE uses - - unsigned UseOpc = CurMI.getOpcode(); - bool IsKMOV = UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk || - UseOpc == X86::KMOVDrk; - // Only allow non-narrowing KMOV uses of the mask. - if (IsKMOV && CurMI.getOperand(1).getReg() == MaskReg && - !isKMovNarrowing(Opc, UseOpc)) { - KMovMI = &CurMI; - // continue scanning to ensure - // there are no *other* uses of the mask later in the block. - } else { - return false; - } - } - - if (!KMovMI && CurMI.modifiesRegister(SrcVecReg, TRI)) { - return false; // SrcVecReg modified before it could be used by MOVMSK - } - } - - if (!KMovMI) - return false; - - // Apply the transformation - KMovMI->setDesc(TII->get(MovMskOpc)); - KMovMI->getOperand(1).setReg(SrcVecReg); - KMovMI->setAsmPrinterFlag(X86::AC_EVEX_2_VEX); - - ToErase.push_back(&MI); - return true; -} - static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB, - const X86Subtarget &ST, - SmallVectorImpl<MachineInstr *> &ToErase) { + const X86Subtarget &ST) { uint64_t TSFlags = MI.getDesc().TSFlags; // Check for EVEX instructions only. @@ -321,10 +190,6 @@ static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB, if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2)) return false; - // Specialized VPMOVD2M + KMOV -> MOVMSK fold first. - if (tryCompressVPMOVPattern(MI, MBB, ST, ToErase)) - return true; - auto IsRedundantNewDataDest = [&](unsigned &Opc) { // $rbx = ADD64rr_ND $rbx, $rax / $rbx = ADD64rr_ND $rax, $rbx // -> @@ -485,15 +350,9 @@ static bool runOnMF(MachineFunction &MF) { bool Changed = false; for (MachineBasicBlock &MBB : MF) { - SmallVector<MachineInstr *, 4> ToErase; - - for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { - Changed |= CompressEVEXImpl(MI, MBB, ST, ToErase); - } - - for (MachineInstr *MI : ToErase) { - MI->eraseFromParent(); - } + // Traverse the basic block. + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) + Changed |= CompressEVEXImpl(MI, MBB, ST); } LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";); return Changed; diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 2617e2d12adfd..1a712ffac5b7e 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1745,7 +1745,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; AVX512DQNOBW: # %bb.0: ; AVX512DQNOBW-NEXT: vpmovsxwd %xmm0, %ymm0 ; AVX512DQNOBW-NEXT: vpslld $31, %ymm0, %ymm0 -; AVX512DQNOBW-NEXT: vmovmskps %ymm0, %eax +; AVX512DQNOBW-NEXT: vpmovd2m %ymm0, %k0 +; AVX512DQNOBW-NEXT: kmovw %k0, %eax ; AVX512DQNOBW-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQNOBW-NEXT: vzeroupper ; AVX512DQNOBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index f8b0c3465f3db..e183da1386d5b 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1669,7 +1669,8 @@ define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) n ; SKX-NEXT: vpmovm2b %k0, %ymm0 ; SKX-NEXT: vpbroadcastb %eax, %ymm0 {%k1} ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 -; SKX-NEXT: vpmovmskb %ymm0, %eax +; SKX-NEXT: vpmovb2m %ymm0, %k0 +; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %t1 = icmp ugt <32 x i8> %a, zeroinitializer diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 6f3be88d7cd0c..f31dafcd68626 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2751,7 +2751,8 @@ declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>) define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovmskps %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x50,0xc0] +; CHECK-NEXT: vpmovd2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0) @@ -2776,7 +2777,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>) define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovmskpd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x50,0xc0] +; CHECK-NEXT: vpmovq2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0) @@ -2788,7 +2790,8 @@ declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>) define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) { ; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovmskpd %ymm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x50,0xc0] +; CHECK-NEXT: vpmovq2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0] +; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/masked_compressstore.ll b/llvm/test/CodeGen/X86/masked_compressstore.ll index 5296c9d0f0777..3187bf6448690 100644 --- a/llvm/test/CodeGen/X86/masked_compressstore.ll +++ b/llvm/test/CodeGen/X86/masked_compressstore.ll @@ -3444,7 +3444,8 @@ define void @compressstore_v8i16_v8i16(ptr %base, <8 x i16> %V, <8 x i16> %trigg ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 -; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax +; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0 +; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: jne LBB11_1 ; AVX512VLDQ-NEXT: ## %bb.2: ## %else diff --git a/llvm/test/CodeGen/X86/masked_expandload.ll b/llvm/test/CodeGen/X86/masked_expandload.ll index ce8a34db498df..e81a983c07018 100644 --- a/llvm/test/CodeGen/X86/masked_expandload.ll +++ b/llvm/test/CodeGen/X86/masked_expandload.ll @@ -3047,7 +3047,8 @@ define <8 x i16> @expandload_v8i16_v8i16(ptr %base, <8 x i16> %src0, <8 x i16> % ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1 ; AVX512VLDQ-NEXT: vpmovsxwd %xmm1, %ymm1 -; AVX512VLDQ-NEXT: vmovmskps %ymm1, %eax +; AVX512VLDQ-NEXT: vpmovd2m %ymm1, %k0 +; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: jne LBB11_1 ; AVX512VLDQ-NEXT: ## %bb.2: ## %else diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index cf49ac1e4886b..58adbb767ed87 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -966,9 +966,10 @@ define <2 x double> @test17(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x doub ; X86-SKX-LABEL: test17: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB16_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1254,7 +1255,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) { ; X64-SKX-LABEL: test20: ; X64-SKX: # %bb.0: ; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; X64-SKX-NEXT: vmovmskpd %xmm2, %eax +; X64-SKX-NEXT: vpmovq2m %xmm2, %k0 +; X64-SKX-NEXT: kmovw %k0, %eax ; X64-SKX-NEXT: testb $1, %al ; X64-SKX-NEXT: jne .LBB19_1 ; X64-SKX-NEXT: # %bb.2: # %else @@ -1275,7 +1277,8 @@ define void @test20(<2 x float>%a1, <2 x ptr> %ptr, <2 x i1> %mask) { ; X86-SKX-LABEL: test20: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; X86-SKX-NEXT: vmovmskpd %xmm2, %eax +; X86-SKX-NEXT: vpmovq2m %xmm2, %k0 +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB19_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1349,7 +1352,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) { ; X64-SKX-LABEL: test21: ; X64-SKX: # %bb.0: ; X64-SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; X64-SKX-NEXT: vmovmskpd %xmm2, %eax +; X64-SKX-NEXT: vpmovq2m %xmm2, %k0 +; X64-SKX-NEXT: kmovw %k0, %eax ; X64-SKX-NEXT: testb $1, %al ; X64-SKX-NEXT: jne .LBB20_1 ; X64-SKX-NEXT: # %bb.2: # %else @@ -1370,7 +1374,8 @@ define void @test21(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask) { ; X86-SKX-LABEL: test21: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; X86-SKX-NEXT: vmovmskpd %xmm2, %eax +; X86-SKX-NEXT: vpmovq2m %xmm2, %k0 +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB20_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1489,9 +1494,10 @@ define <2 x float> @test22(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float ; X86-SKX-LABEL: test22: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB21_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1611,10 +1617,11 @@ define <2 x float> @test22a(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x floa ; X86-SKX-LABEL: test22a: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB22_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1734,9 +1741,10 @@ define <2 x i32> @test23(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %s ; X86-SKX-LABEL: test23: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB23_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -1852,10 +1860,11 @@ define <2 x i32> @test23b(ptr %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> % ; X86-SKX-LABEL: test23b: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X86-SKX-NEXT: vpslld $2, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB24_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -2025,9 +2034,10 @@ define <2 x i64> @test25(ptr %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %s ; X86-SKX-LABEL: test25: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm1, %xmm1 +; X86-SKX-NEXT: vpmovq2m %xmm1, %k0 ; X86-SKX-NEXT: vpslld $3, %xmm0, %xmm0 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm0, %xmm0 -; X86-SKX-NEXT: vmovmskpd %xmm1, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB26_1 ; X86-SKX-NEXT: # %bb.2: # %else @@ -3752,9 +3762,10 @@ define void @test_scatter_2i32_index(<2 x double> %a1, ptr %base, <2 x i32> %ind ; X86-SKX-LABEL: test_scatter_2i32_index: ; X86-SKX: # %bb.0: ; X86-SKX-NEXT: vpsllq $63, %xmm2, %xmm2 +; X86-SKX-NEXT: vpmovq2m %xmm2, %k0 ; X86-SKX-NEXT: vpslld $3, %xmm1, %xmm1 ; X86-SKX-NEXT: vpaddd {{[0-9]+}}(%esp){1to4}, %xmm1, %xmm1 -; X86-SKX-NEXT: vmovmskpd %xmm2, %eax +; X86-SKX-NEXT: kmovw %k0, %eax ; X86-SKX-NEXT: testb $1, %al ; X86-SKX-NEXT: jne .LBB52_1 ; X86-SKX-NEXT: # %bb.2: # %else diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll index 5b5280601ea71..aad1b44344850 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll @@ -164,7 +164,8 @@ define <2 x i32> @test_gather_v2i32_data(<2 x ptr> %ptr, <2 x i1> %mask, <2 x i3 ; WIDEN_SKX-LABEL: test_gather_v2i32_data: ; WIDEN_SKX: # %bb.0: ; WIDEN_SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; WIDEN_SKX-NEXT: vmovmskpd %xmm1, %eax +; WIDEN_SKX-NEXT: vpmovq2m %xmm1, %k0 +; WIDEN_SKX-NEXT: kmovw %k0, %eax ; WIDEN_SKX-NEXT: testb $1, %al ; WIDEN_SKX-NEXT: jne .LBB2_1 ; WIDEN_SKX-NEXT: # %bb.2: # %else @@ -225,7 +226,8 @@ define void @test_scatter_v2i32_data(<2 x i32>%a1, <2 x ptr> %ptr, <2 x i1>%mask ; WIDEN_SKX-LABEL: test_scatter_v2i32_data: ; WIDEN_SKX: # %bb.0: ; WIDEN_SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; WIDEN_SKX-NEXT: vmovmskpd %xmm2, %eax +; WIDEN_SKX-NEXT: vpmovq2m %xmm2, %k0 +; WIDEN_SKX-NEXT: kmovw %k0, %eax ; WIDEN_SKX-NEXT: testb $1, %al ; WIDEN_SKX-NEXT: jne .LBB3_1 ; WIDEN_SKX-NEXT: # %bb.2: # %else diff --git a/llvm/test/CodeGen/X86/masked_load.ll b/llvm/test/CodeGen/X86/masked_load.ll index fa8f34cea4638..8c4bab99a5b7b 100644 --- a/llvm/test/CodeGen/X86/masked_load.ll +++ b/llvm/test/CodeGen/X86/masked_load.ll @@ -3008,7 +3008,8 @@ define <8 x i16> @load_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %dst ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0 ; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: vmovmskps %ymm0, %eax +; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k0 +; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: jne LBB21_1 ; AVX512VLDQ-NEXT: ## %bb.2: ## %else diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index fbecfcb45f8e7..c7320275091c6 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -1829,7 +1829,8 @@ define void @store_v8i16_v8i16(<8 x i16> %trigger, ptr %addr, <8 x i16> %val) no ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 ; AVX512VLDQ-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: vmovmskps %ymm0, %eax +; AVX512VLDQ-NEXT: vpmovd2m %ymm0, %k0 +; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: jne LBB13_1 ; AVX512VLDQ-NEXT: ## %bb.2: ## %else diff --git a/llvm/test/CodeGen/X86/pr77459.ll b/llvm/test/CodeGen/X86/pr77459.ll index b03907d6c871f..9c072e6f5e3fc 100644 --- a/llvm/test/CodeGen/X86/pr77459.ll +++ b/llvm/test/CodeGen/X86/pr77459.ll @@ -100,7 +100,8 @@ define i8 @reverse_cmp_v8i1(<8 x i16> %a0, <8 x i16> %a1) { ; AVX512-NEXT: vpmovm2d %k0, %ymm0 ; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0] ; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: vmovmskps %ymm0, %eax +; AVX512-NEXT: vpmovd2m %ymm0, %k0 +; AVX512-NEXT: kmovd %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -226,7 +227,8 @@ define i32 @reverse_cmp_v32i1(<32 x i8> %a0, <32 x i8> %a1) { ; AVX512-V4-NEXT: vpmovm2b %k0, %ymm0 ; AVX512-V4-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16] ; AVX512-V4-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512-V4-NEXT: vpmovmskb %ymm0, %eax +; AVX512-V4-NEXT: vpmovb2m %ymm0, %k0 +; AVX512-V4-NEXT: kmovd %k0, %eax ; AVX512-V4-NEXT: vzeroupper ; AVX512-V4-NEXT: retq ; @@ -236,7 +238,8 @@ define i32 @reverse_cmp_v32i1(<32 x i8> %a0, <32 x i8> %a1) { ; AVX512-VBMI-NEXT: vpmovm2b %k0, %ymm0 ; AVX512-VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0] ; AVX512-VBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0 -; AVX512-VBMI-NEXT: vpmovmskb %ymm0, %eax +; AVX512-VBMI-NEXT: vpmovb2m %ymm0, %k0 +; AVX512-VBMI-NEXT: kmovd %k0, %eax ; AVX512-VBMI-NEXT: vzeroupper ; AVX512-VBMI-NEXT: retq %cmp = icmp eq <32 x i8> %a0, %a1 diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll index 116dcdc8c5907..f434fc8c6cad8 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -1240,7 +1240,8 @@ define i8 @icmp0_v8i1(<8 x i8>) nounwind { ; AVX512VL-LABEL: icmp0_v8i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmovmskb %xmm0, %eax +; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax ; AVX512VL-NEXT: testb %al, %al ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq @@ -1906,7 +1907,8 @@ define i8 @icmp1_v8i1(<8 x i8>) nounwind { ; AVX512VL-LABEL: icmp1_v8i1: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $7, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmovmskb %xmm0, %eax +; AVX512VL-NEXT: vpmovb2m %xmm0, %k0 +; AVX512VL-NEXT: kmovd %k0, %eax ; AVX512VL-NEXT: cmpb $-1, %al ; AVX512VL-NEXT: sete %al ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index 9645f7c524cb4..2b89590a0bb41 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -573,7 +573,8 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) { ; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm2 = [8,2,10,3,3,2,2,3] ; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 -; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq @@ -614,7 +615,8 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) { ; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 ; VL_BW_DQ-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq @@ -659,7 +661,8 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) { ; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm2 = [8,6,1,0,3,7,7,0] ; VL_BW_DQ-NEXT: vpermi2d %ymm1, %ymm0, %ymm2 -; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq @@ -700,7 +703,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) { ; VL_BW_DQ-NEXT: kmovd %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 ; VL_BW_DQ-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[8,9,10,11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq @@ -742,7 +746,8 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) { ; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0 ; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] ; VL_BW_DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6,7] -; VL_BW_DQ-NEXT: vmovmskps %ymm0, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq @@ -791,7 +796,8 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) { ; VL_BW_DQ-NEXT: vpmovsxbd {{.*#+}} ymm1 = [9,1,2,3,4,5,6,7] ; VL_BW_DQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; VL_BW_DQ-NEXT: vpermt2d %ymm0, %ymm1, %ymm2 -; VL_BW_DQ-NEXT: vmovmskps %ymm2, %eax +; VL_BW_DQ-NEXT: vpmovd2m %ymm2, %k0 +; VL_BW_DQ-NEXT: kmovd %k0, %eax ; VL_BW_DQ-NEXT: # kill: def $al killed $al killed $eax ; VL_BW_DQ-NEXT: vzeroupper ; VL_BW_DQ-NEXT: retq _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
