Hi Craig, Could this be done in the headers, like the 128-bit ones? That way we could get rid of the builtins, no?
Thanks, Filipe On Sun, Feb 15, 2015 at 4:42 PM, Craig Topper <[email protected]> wrote: > Author: ctopper > Date: Sun Feb 15 18:42:49 2015 > New Revision: 229348 > > URL: http://llvm.org/viewvc/llvm-project?rev=229348&view=rev > Log: > [X86] Teach clang to lower __builtin_ia32_psrldqi256 and > __builtin_ia32_pslldqi256 to vector shuffles the backend recognizes. This > is a step towards removing the corresponding intrinsics from the backend. > > Modified: > cfe/trunk/lib/CodeGen/CGBuiltin.cpp > cfe/trunk/test/CodeGen/avx2-builtins.c > > Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=229348&r1=229347&r2=229348&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Feb 15 18:42:49 2015 > @@ -6025,6 +6025,60 @@ Value *CodeGenFunction::EmitX86BuiltinEx > // If palignr is shifting the pair of vectors more than 32 bytes, > emit zero. > return llvm::Constant::getNullValue(ConvertType(E->getType())); > } > + case X86::BI__builtin_ia32_pslldqi256: { > + // Shift value is in bits so divide by 8. > + unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() > >> 3; > + > + // If pslldq is shifting the vector more than 15 bytes, emit zero. > + if (shiftVal >= 16) > + return llvm::Constant::getNullValue(ConvertType(E->getType())); > + > + SmallVector<llvm::Constant*, 32> Indices; > + // 256-bit pslldq operates on 128-bit lanes so we need to handle that > + for (unsigned l = 0; l != 32; l += 16) { > + for (unsigned i = 0; i != 16; ++i) { > + unsigned Idx = 32 + i - shiftVal; > + if (Idx < 32) Idx -= 16; // end of lane, switch operand. > + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); > + } > + } > + > + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); > + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); > + Value *Zero = llvm::Constant::getNullValue(VecTy); > + > + Value *SV = llvm::ConstantVector::get(Indices); > + SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); > + llvm::Type *ResultType = ConvertType(E->getType()); > + return Builder.CreateBitCast(SV, ResultType, "cast"); > + } > + case X86::BI__builtin_ia32_psrldqi256: { > + // Shift value is in bits so divide by 8. > + unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() > >> 3; > + > + // If psrldq is shifting the vector more than 15 bytes, emit zero. > + if (shiftVal >= 16) > + return llvm::Constant::getNullValue(ConvertType(E->getType())); > + > + SmallVector<llvm::Constant*, 32> Indices; > + // 256-bit psrldq operates on 128-bit lanes so we need to handle that > + for (unsigned l = 0; l != 32; l += 16) { > + for (unsigned i = 0; i != 16; ++i) { > + unsigned Idx = i + shiftVal; > + if (Idx >= 16) Idx += 16; // end of lane, switch operand. > + Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); > + } > + } > + > + llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); > + Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); > + Value *Zero = llvm::Constant::getNullValue(VecTy); > + > + Value *SV = llvm::ConstantVector::get(Indices); > + SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); > + llvm::Type *ResultType = ConvertType(E->getType()); > + return Builder.CreateBitCast(SV, ResultType, "cast"); > + } > case X86::BI__builtin_ia32_movntps: > case X86::BI__builtin_ia32_movntps256: > case X86::BI__builtin_ia32_movntpd: > > Modified: cfe/trunk/test/CodeGen/avx2-builtins.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=229348&r1=229347&r2=229348&view=diff > > ============================================================================== > --- cfe/trunk/test/CodeGen/avx2-builtins.c (original) > +++ cfe/trunk/test/CodeGen/avx2-builtins.c Sun Feb 15 18:42:49 2015 > @@ -462,7 +462,7 @@ __m256i test_mm256_sign_epi32(__m256i a, > } > > __m256i test_mm256_slli_si256(__m256i a) { > - // CHECK: @llvm.x86.avx2.psll.dq > + // CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, > <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, > i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 > 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, > i32 56, i32 57, i32 58, i32 59, i32 60> > return _mm256_slli_si256(a, 3); > } > > @@ -517,7 +517,7 @@ __m256i test_mm256_sra_epi32(__m256i a, > } > > __m256i test_mm256_srli_si256(__m256i a) { > - // CHECK: @llvm.x86.avx2.psrl.dq > + // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, > <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 > 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, > i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 > 30, i32 31, i32 48, i32 49, i32 50> > return _mm256_srli_si256(a, 3); > } > > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
