Author: Thurston Dang Date: 2025-08-12T21:31:33-07:00 New Revision: d7b5ea35082fdf50f3f07d7fc59f0e50f9af6dba
URL: https://github.com/llvm/llvm-project/commit/d7b5ea35082fdf50f3f07d7fc59f0e50f9af6dba DIFF: https://github.com/llvm/llvm-project/commit/d7b5ea35082fdf50f3f07d7fc59f0e50f9af6dba.diff LOG: Revert "[msan] Improve packed multiply-add instrumentation (#152941)" This reverts commit ba603b5e4d44f1a25207a2a00196471d2ba93424. Added: Modified: llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 4cef57d43f203..21bd4164385ab 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } - // Get an MMX-sized (64-bit) vector type, or optionally, other sized - // vectors. - Type *getMMXVectorTy(unsigned EltSizeInBits, - unsigned X86_MMXSizeInBits = 64) { + // Get an MMX-sized vector type. + Type *getMMXVectorTy(unsigned EltSizeInBits) { + const unsigned X86_MMXSizeInBits = 64; assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && "Illegal MMX vector element size"); return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), @@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } - // Instrument multiply-add intrinsics. - // - // e.g., Two operands: - // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) - // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) - // - // Three operands are not implemented yet: - // <4 x i32> @llvm.x86.avx512.vpdpbusd.128 - // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b) - // (the result of multiply-add'ing %a and %b is accumulated with %s) - void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor, - unsigned EltSizeInBits = 0) { + // Instrument multiply-add intrinsic. + void handleVectorPmaddIntrinsic(IntrinsicInst &I, + unsigned MMXEltSizeInBits = 0) { + Type *ResTy = + MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType(); IRBuilder<> IRB(&I); - - [[maybe_unused]] FixedVectorType *ReturnType = - cast<FixedVectorType>(I.getType()); - assert(isa<FixedVectorType>(ReturnType)); - - assert(I.arg_size() == 2); - - // Vectors A and B, and shadows - Value *Va = I.getOperand(0); - Value *Vb = I.getOperand(1); - - Value *Sa = getShadow(&I, 0); - Value *Sb = getShadow(&I, 1); - - FixedVectorType *ParamType = - cast<FixedVectorType>(I.getArgOperand(0)->getType()); - assert(ParamType == I.getArgOperand(1)->getType()); - - assert(ParamType->getPrimitiveSizeInBits() == - ReturnType->getPrimitiveSizeInBits()); - - // Step 1: instrument multiplication of corresponding vector elements - if (EltSizeInBits) { - ParamType = cast<FixedVectorType>( - getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); - - Va = IRB.CreateBitCast(Va, ParamType); - Vb = IRB.CreateBitCast(Vb, ParamType); - - Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType)); - Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType)); - } else { - assert(ParamType->getNumElements() == - ReturnType->getNumElements() * ReductionFactor); - } - - Value *Sab = IRB.CreateOr(Sa, Sb); - - // Multiplying an uninitialized / element by zero results in an initialized - // element. - Value *Zero = Constant::getNullValue(Va->getType()); - Value *VaNotZero = IRB.CreateICmpNE(Va, Zero); - Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero); - Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero); - - // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have - // <8 x i32> %ab, but we cheated and ended up with <8 x i16>. - Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType())); - - // Step 2: instrument horizontal add - // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2) - // <16 x i8> into <4 x i8> (reduction factor == 4) - Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr); - - // Extend to <4 x i32>. - // For MMX, cast it back to <1 x i64>. - OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); - - setShadow(&I, OutShadow); + auto *Shadow0 = getShadow(&I, 0); + auto *Shadow1 = getShadow(&I, 1); + Value *S = IRB.CreateOr(Shadow0, Shadow1); + S = IRB.CreateBitCast(S, ResTy); + S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), + ResTy); + S = IRB.CreateBitCast(S, getShadowTy(&I)); + setShadow(&I, S); setOriginForNaryOp(I); } @@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { handleVectorSadIntrinsic(I); break; - // Multiply and Add Packed Words - // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) - // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) - - // Multiply and Add Packed Signed and Unsigned Bytes - // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) - // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) case Intrinsic::x86_sse2_pmadd_wd: case Intrinsic::x86_avx2_pmadd_wd: case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); + handleVectorPmaddIntrinsic(I); break; - // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) case Intrinsic::x86_ssse3_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8); + handleVectorPmaddIntrinsic(I, 8); break; - // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) case Intrinsic::x86_mmx_pmadd_wd: - handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); + handleVectorPmaddIntrinsic(I, 16); break; case Intrinsic::x86_sse_cmp_ss: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index ab05e10ccd3c9..f916130fe53e5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> -; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> -; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]] -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> -; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] -; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index a0341c67b1365..ac3bb56719038 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16> -; CHECK-NEXT: [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]] -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3> -; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32 -; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64> -; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8> -; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]] -; CHECK-NEXT: [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32 -; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP29]] to i64 -; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64> -; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8> -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index fe1245553c116..8f915a59db8e5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index bf87027b056fa..5cc56baf0e0de 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -149,17 +149,11 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP5]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sext <16 x i1> [[TMP13]] to <16 x i16> -; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i16> [[TMP4]], [[TMP14]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> -; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i16> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP11]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -720,17 +714,11 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP5]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sext <32 x i1> [[TMP13]] to <32 x i8> -; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i8> [[TMP4]], [[TMP14]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> -; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP11]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -746,7 +734,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -756,17 +744,11 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP17]] -; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i8> [[TMP9]], [[TMP18]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31> -; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i8> [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) +; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index e0e75a91cbd2e..0a3efaaea149f 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -1730,27 +1730,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> -; CHECK-NEXT: [[TMP22:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> -; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i16> [[TMP11]], [[TMP22]] -; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP32:%.*]] = and <4 x i1> [[TMP30]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = sext <4 x i1> [[TMP32]] to <4 x i16> -; CHECK-NEXT: [[TMP24:%.*]] = and <4 x i16> [[TMP23]], [[TMP33]] -; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP24]], <4 x i16> poison, <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i16> [[TMP24]], <4 x i16> poison, <2 x i32> <i32 1, i32 3> -; CHECK-NEXT: [[TMP27:%.*]] = or <2 x i16> [[TMP25]], [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i16> [[TMP27]] to i32 -; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP29]] to <1 x i64> -; CHECK-NEXT: [[TMP34:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> +; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = bitcast <1 x i64> [[TMP34]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP35]] to <1 x i64> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3412,27 +3401,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> -; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] -; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer -; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP33]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = sext <8 x i1> [[TMP35]] to <8 x i8> -; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP26]], [[TMP36]] -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP29:%.*]] = or <4 x i8> [[TMP27]], [[TMP28]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i8> [[TMP29]] to i32 -; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP24]] to i64 -; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to <1 x i64> -; CHECK-NEXT: [[TMP31:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP25:%.*]] = bitcast <1 x i64> [[TMP32]] to <8 x i8> -; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i64> [[TMP31]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP25]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP37]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> +; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index 5edc13b7abbc4..e771e60e2f294 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -800,17 +800,11 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP5]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = sext <8 x i1> [[TMP13]] to <8 x i16> -; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i16> [[TMP4]], [[TMP14]] -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i16> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) +; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index 21996b13a9961..d614bb85d8584 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -18,15 +18,9 @@ define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[A]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[B]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i16> [[TMP2]], [[TMP11]] -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[C:%.*]] = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[C]] @@ -45,24 +39,13 @@ define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_me ; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i8> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i8> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = sext <8 x i1> [[TMP16]] to <8 x i8> -; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i8> [[TMP13]], [[TMP17]] -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i8> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x i8> [[TMP10]] to i32 -; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP19]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = or <1 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> ; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]] -; CHECK-NEXT: store <1 x i64> [[TMP20]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[C]] ; entry: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits