Author: Simon Pilgrim Date: 2025-08-13T10:28:30+01:00 New Revision: 91fff707407ea4d3455d4675bc1310a7bc0b5d58
URL: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58 DIFF: https://github.com/llvm/llvm-project/commit/91fff707407ea4d3455d4675bc1310a7bc0b5d58.diff LOG: [clang][X86] Replace vprot/vprol/vpror/vshld/vshrd intrinsics with __builtin_elementwise_fshl/fshr (#153229) Replaces the XOP/AVX512 per-element rotation/funnel shift builtins with the generic __builtin_elementwise_fshl/fshr We still have uniform immediate variants to handle next. Part of #153152 Added: Modified: clang/include/clang/Basic/BuiltinsX86.td clang/lib/CodeGen/TargetBuiltins/X86.cpp clang/lib/Headers/avx512fintrin.h clang/lib/Headers/avx512vbmi2intrin.h clang/lib/Headers/avx512vlintrin.h clang/lib/Headers/avx512vlvbmi2intrin.h clang/lib/Headers/xopintrin.h Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index fc1ee3be7889f..4262bdaa7cdd9 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -945,10 +945,6 @@ let Features = "xop", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vphsubwd : X86Builtin<"_Vector<4, int>(_Vector<8, short>)">; def vphsubdq : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>)">; def vpperm : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">; - def vprotb : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; - def vprotw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def vprotd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - def vprotq : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; def vprotbi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant char)">; def vprotwi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant char)">; def vprotdi : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant char)">; @@ -1882,78 +1878,6 @@ let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVect def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshldvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshldvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshldvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshrdvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshrdvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdvw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshrdvw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">; -} - -let Features = "avx512vbmi2,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdvw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; -} - let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; } @@ -2165,28 +2089,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 } let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def prolvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; - def prolvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; def prord512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def prorq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def prolvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def prolvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def prolvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def prolvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; -} - let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def prord128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; } @@ -2203,27 +2109,6 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; } -let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def prorvd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>)">; - def prorvq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def prorvd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def prorvd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def prorvq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; -} - -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def prorvq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; -} - let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index f8d451bd20fa3..b9248a7d43f85 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -1932,10 +1932,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return SI; } // Rotate is a special case of funnel shift - 1st 2 args are the same. - case X86::BI__builtin_ia32_vprotb: - case X86::BI__builtin_ia32_vprotw: - case X86::BI__builtin_ia32_vprotd: - case X86::BI__builtin_ia32_vprotq: case X86::BI__builtin_ia32_vprotbi: case X86::BI__builtin_ia32_vprotwi: case X86::BI__builtin_ia32_vprotdi: @@ -1946,12 +1942,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_prolq128: case X86::BI__builtin_ia32_prolq256: case X86::BI__builtin_ia32_prolq512: - case X86::BI__builtin_ia32_prolvd128: - case X86::BI__builtin_ia32_prolvd256: - case X86::BI__builtin_ia32_prolvd512: - case X86::BI__builtin_ia32_prolvq128: - case X86::BI__builtin_ia32_prolvq256: - case X86::BI__builtin_ia32_prolvq512: return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); case X86::BI__builtin_ia32_prord128: case X86::BI__builtin_ia32_prord256: @@ -1959,12 +1949,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_prorq128: case X86::BI__builtin_ia32_prorq256: case X86::BI__builtin_ia32_prorq512: - case X86::BI__builtin_ia32_prorvd128: - case X86::BI__builtin_ia32_prorvd256: - case X86::BI__builtin_ia32_prorvd512: - case X86::BI__builtin_ia32_prorvq128: - case X86::BI__builtin_ia32_prorvq256: - case X86::BI__builtin_ia32_prorvq512: return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: @@ -2357,29 +2341,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Ops 0 and 1 are swapped. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); - case X86::BI__builtin_ia32_vpshldvd128: - case X86::BI__builtin_ia32_vpshldvd256: - case X86::BI__builtin_ia32_vpshldvd512: - case X86::BI__builtin_ia32_vpshldvq128: - case X86::BI__builtin_ia32_vpshldvq256: - case X86::BI__builtin_ia32_vpshldvq512: - case X86::BI__builtin_ia32_vpshldvw128: - case X86::BI__builtin_ia32_vpshldvw256: - case X86::BI__builtin_ia32_vpshldvw512: - return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); - - case X86::BI__builtin_ia32_vpshrdvd128: - case X86::BI__builtin_ia32_vpshrdvd256: - case X86::BI__builtin_ia32_vpshrdvd512: - case X86::BI__builtin_ia32_vpshrdvq128: - case X86::BI__builtin_ia32_vpshrdvq256: - case X86::BI__builtin_ia32_vpshrdvq512: - case X86::BI__builtin_ia32_vpshrdvw128: - case X86::BI__builtin_ia32_vpshrdvw256: - case X86::BI__builtin_ia32_vpshrdvw512: - // Ops 0 and 1 are swapped. - return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); - // Reductions case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9fc1df3acd3d0..90f883ba9f770 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -4926,7 +4926,7 @@ _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_fshr((__v16su)__A,(__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -4948,7 +4948,7 @@ _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_fshr((__v8du)__A, (__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -5038,7 +5038,7 @@ _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); + return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__A, (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -5060,7 +5060,7 @@ _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64 (__m512i __A, __m512i __B) { - return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); + return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__A, (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 diff --git a/clang/lib/Headers/avx512vbmi2intrin.h b/clang/lib/Headers/avx512vbmi2intrin.h index 11598c888787c..f9a5f82b61f82 100644 --- a/clang/lib/Headers/avx512vbmi2intrin.h +++ b/clang/lib/Headers/avx512vbmi2intrin.h @@ -215,8 +215,8 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, - (__v8di)__C); + return (__m512i)__builtin_elementwise_fshl((__v8du)__A, (__v8du)__B, + (__v8du)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -238,8 +238,8 @@ _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + return (__m512i)__builtin_elementwise_fshl((__v16su)__A, (__v16su)__B, + (__v16su)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -261,8 +261,8 @@ _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, - (__v32hi)__C); + return (__m512i)__builtin_elementwise_fshl((__v32hu)__A, (__v32hu)__B, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -284,8 +284,9 @@ _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, - (__v8di)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v8du)__B, (__v8du)__A, + (__v8du)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -307,8 +308,9 @@ _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, - (__v16si)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v16su)__B, (__v16su)__A, + (__v16su)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -330,8 +332,9 @@ _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { - return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, - (__v32hi)__C); + // Ops __A and __B are swapped. + return (__m512i)__builtin_elementwise_fshr((__v32hu)__B, (__v32hu)__A, + (__v32hu)__C); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index cbad39acad84f..366adab1fab32 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -4310,7 +4310,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -4332,7 +4332,7 @@ _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__A, (__v8su)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -4354,7 +4354,7 @@ _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -4376,7 +4376,7 @@ _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -4578,7 +4578,7 @@ _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshr((__v4su)__A, (__v4su)__A, (__v4su)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -4600,7 +4600,7 @@ _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); + return (__m256i)__builtin_elementwise_fshr((__v8su)__A, (__v8su)__A, (__v8su)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -4622,7 +4622,7 @@ _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64 (__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshr((__v2du)__A, (__v2du)__A, (__v2du)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -4644,7 +4644,7 @@ _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64 (__m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); + return (__m256i)__builtin_elementwise_fshr((__v4du)__A, (__v4du)__A, (__v4du)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/clang/lib/Headers/avx512vlvbmi2intrin.h b/clang/lib/Headers/avx512vlvbmi2intrin.h index 77af2d5cbd2a0..04db52c822640 100644 --- a/clang/lib/Headers/avx512vlvbmi2intrin.h +++ b/clang/lib/Headers/avx512vlvbmi2intrin.h @@ -415,8 +415,8 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, - (__v4di)__C); + return (__m256i)__builtin_elementwise_fshl((__v4du)__A, (__v4du)__B, + (__v4du)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -438,8 +438,8 @@ _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, - (__v2di)__C); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__B, + (__v2du)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -461,8 +461,8 @@ _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, - (__v8si)__C); + return (__m256i)__builtin_elementwise_fshl((__v8su)__A, (__v8su)__B, + (__v8su)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -484,8 +484,8 @@ _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, - (__v4si)__C); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__B, + (__v4su)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -507,8 +507,8 @@ _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, - (__v16hi)__C); + return (__m256i)__builtin_elementwise_fshl((__v16hu)__A, (__v16hu)__B, + (__v16hu)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -530,8 +530,8 @@ _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, - (__v8hi)__C); + return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__B, + (__v8hu)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -553,8 +553,9 @@ _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, - (__v4di)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v4du)__B, (__v4du)__A, + (__v4du)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -576,8 +577,9 @@ _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, - (__v2di)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v2du)__B, (__v2du)__A, + (__v2du)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -599,8 +601,9 @@ _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, - (__v8si)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v8su)__B, (__v8su)__A, + (__v8su)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -622,8 +625,9 @@ _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, - (__v4si)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v4su)__B, (__v4su)__A, + (__v4su)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -645,8 +649,9 @@ _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) { - return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, - (__v16hi)__C); + // Ops __A and __B are swapped. + return (__m256i)__builtin_elementwise_fshr((__v16hu)__B, (__v16hu)__A, + (__v16hu)__C); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -668,8 +673,9 @@ _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) { - return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, - (__v8hi)__C); + // Ops __A and __B are swapped. + return (__m128i)__builtin_elementwise_fshr((__v8hu)__B, (__v8hu)__A, + (__v8hu)__C); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h index fb88a9060574e..7015719659139 100644 --- a/clang/lib/Headers/xopintrin.h +++ b/clang/lib/Headers/xopintrin.h @@ -211,25 +211,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi8(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B); + return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi16(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B); + return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi32(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B); + return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rot_epi64(__m128i __A, __m128i __B) { - return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B); + return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B); } #define _mm_roti_epi8(A, N) \ _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
