https://github.com/stomfaig updated https://github.com/llvm/llvm-project/pull/171227
>From 62647bf9b0323e8ca161dd87657e56e5d6aa20b1 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Sun, 7 Dec 2025 23:37:58 +0000 Subject: [PATCH 1/8] adding initial handlers --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 98 ++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index be2b7d442645e..a3e5c48629228 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -840,6 +840,104 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Ops[0]); return Builder.CreateExtractValue(Call, 0); } + case X86::BI__builtin_ia32_roundps: + case X86::BI__builtin_ia32_roundpd: + case X86::BI__builtin_ia32_roundps256: + case X86::BI__builtin_ia32_roundpd256: { + unsigned M = cast<ConstantInt>(Ops[1])->getZExtValue(); + unsigned roundingModeAndPE = M & 0b111; + unsigned updatePE = M & 0b100; + unsigned use_MXCSR = M & 0b1000; + + Intrinsic::ID ID; + + // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction + if (use_MXCSR) { + switch (BuiltinID) { + case X86::BI__builtin_ia32_roundps: ID = Intrinsic::x86_sse41_round_ps; break; + case X86::BI__builtin_ia32_roundpd: ID = Intrinsic::x86_sse41_round_pd; break; + } + return nullptr; + } else { + switch (roundingModeAndPE) { + default: return nullptr; + case 0b000: ID = Intrinsic::nearbyint; break; + case 0b001: ID = Intrinsic::floor; break; + case 0b010: ID = Intrinsic::ceil; break; + case 0b011: ID = Intrinsic::trunc; break; + case 0b100: ID = Intrinsic::experimental_constrained_floor; break; // TODO: replace with actual op + case 0b101: ID = Intrinsic::experimental_constrained_floor; break; + case 0b110: ID = Intrinsic::experimental_constrained_ceil; break; + case 0b111: ID = Intrinsic::experimental_constrained_trunc; break; + } + } + + Function *F = CGM.getIntrinsic(ID, Ops[0]->getType()); + + if (updatePE) { + LLVMContext &Ctx = CGM.getLLVMContext(); + + Value *ExceptMode =MetadataAsValue::get( + Ctx, + MDString::get(Ctx, "fpexcept.strict") + ); + + return Builder.CreateCall(F, {Ops[0], ExceptMode}); + } + + return Builder.CreateCall(F, {Ops[0]}); + } + case X86::BI__builtin_ia32_roundss: + case X86::BI__builtin_ia32_roundsd: { + unsigned M = cast<ConstantInt>(Ops[2])->getZExtValue(); + unsigned roundingModeAndPE = M & 0b111; + unsigned updatePE = M & 0b100; + unsigned use_MXCSR = M & 0b1000; + + Intrinsic::ID ID; + + // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction + if (use_MXCSR) { + switch (BuiltinID) { + case X86::BI__builtin_ia32_roundss: ID = Intrinsic::x86_sse41_round_ss; break; + case X86::BI__builtin_ia32_roundsd: ID = Intrinsic::x86_sse41_round_sd; break; + } + return nullptr; + } else { + switch (roundingModeAndPE) { + default: return nullptr; + case 0b000: ID = Intrinsic::nearbyint; break; + case 0b001: ID = Intrinsic::floor; break; + case 0b010: ID = Intrinsic::ceil; break; + case 0b011: ID = Intrinsic::trunc; break; + case 0b100: ID = Intrinsic::experimental_constrained_floor; break; // TODO: replace with actual op + case 0b101: ID = Intrinsic::experimental_constrained_floor; break; + case 0b110: ID = Intrinsic::experimental_constrained_ceil; break; + case 0b111: ID = Intrinsic::experimental_constrained_trunc; break; + } + } + + Value *idx = Builder.getInt32(0); + Value *b0 = Builder.CreateExtractElement(Ops[1], idx); + Value *rounded0; + + Function *F = CGM.getIntrinsic(ID, b0->getType()); + + if (updatePE) { + LLVMContext &Ctx = CGM.getLLVMContext(); + + Value *ExceptMode =MetadataAsValue::get( + Ctx, + MDString::get(Ctx, "fpexcept.strict") + ); + + rounded0 = Builder.CreateCall(F, {b0, ExceptMode}); + } else { + rounded0 = Builder.CreateCall(F, {b0}); + } + + return Builder.CreateInsertElement(Ops[0], rounded0, idx); + } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { >From 56f72b858744ff3c837170aac2f6b9654dd9be0e Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Sun, 7 Dec 2025 23:38:35 +0000 Subject: [PATCH 2/8] modify relevant tests --- clang/test/CodeGen/X86/avx-builtins.c | 12 ++++----- clang/test/CodeGen/X86/pr51324.c | 2 +- clang/test/CodeGen/X86/sse41-builtins.c | 36 ++++++++++++++++--------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 13da4292c5b92..506327bc910c7 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -246,13 +246,13 @@ TEST_CONSTEXPR(match_m128i(_mm256_castsi256_si128((__m256i)(__v4du){0xBFF0000000 __m256d test_mm256_ceil_pd(__m256d x) { // CHECK-LABEL: test_mm256_ceil_pd - // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 2) + // CHECK: %{{.*}} = call <4 x double> @llvm.ceil.v4f64(<4 x double> %{{.*}}) return _mm256_ceil_pd(x); } __m256 test_mm_ceil_ps(__m256 x) { // CHECK-LABEL: test_mm_ceil_ps - // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 2) + // CHECK: %{{.*}} = call <8 x float> @llvm.ceil.v8f32(<8 x float> %{{.*}}) return _mm256_ceil_ps(x); } @@ -1095,13 +1095,13 @@ TEST_CONSTEXPR(match_m128i(_mm256_extractf128_si256(((__m256i){0ULL, 1ULL, 2ULL, __m256d test_mm256_floor_pd(__m256d x) { // CHECK-LABEL: test_mm256_floor_pd - // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1) + // CHECK: %{{.*}} = call <4 x double> @llvm.floor.v4f64(<4 x double> %{{.*}}) return _mm256_floor_pd(x); } __m256 test_mm_floor_ps(__m256 x) { // CHECK-LABEL: test_mm_floor_ps - // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 1) + // CHECK: %{{.*}} = call <8 x float> @llvm.floor.v8f32(<8 x float> %{{.*}}) return _mm256_floor_ps(x); } @@ -1511,13 +1511,13 @@ __m256 test_mm256_rcp_ps(__m256 A) { __m256d test_mm256_round_pd(__m256d x) { // CHECK-LABEL: test_mm256_round_pd - // CHECK: call {{.*}}<4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 4) + // CHECK: %{{.*}} = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %{{.*}}, metadata !"fpexcept.strict") return _mm256_round_pd(x, 4); } __m256 test_mm256_round_ps(__m256 x) { // CHECK-LABEL: test_mm256_round_ps - // CHECK: call {{.*}}<8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 4) + // CHECK: %{{.*}} = call <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float> %{{.*}}, metadata !"fpexcept.strict") return _mm256_round_ps(x, 4); } diff --git a/clang/test/CodeGen/X86/pr51324.c b/clang/test/CodeGen/X86/pr51324.c index 10d1ba3c84b85..de97183aa6613 100644 --- a/clang/test/CodeGen/X86/pr51324.c +++ b/clang/test/CodeGen/X86/pr51324.c @@ -9,7 +9,7 @@ // Make sure brackets work after macro intrinsics. float pr51324(__m128 a) { // CHECK-LABEL: pr51324 - // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 0) + // call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %{{.*}}) // CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm_round_ps(a, 0)[0]; } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 35fa65a99836b..9163b14a9fc11 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -75,25 +75,29 @@ TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f} __m128d test_mm_ceil_pd(__m128d x) { // CHECK-LABEL: test_mm_ceil_pd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2) + // CHECK %{{.*}} = call <2 x double> @llvm.ceil.v2f64(<2 x double> %{{.*}}) return _mm_ceil_pd(x); } __m128 test_mm_ceil_ps(__m128 x) { // CHECK-LABEL: test_mm_ceil_ps - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2) + // CHECK: %{{.*}} = call <4 x float> @llvm.ceil.v4f32(<4 x float> %{{.*}}) return _mm_ceil_ps(x); } __m128d test_mm_ceil_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_ceil_sd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2) + // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call double @llvm.ceil.f64(double %[[A:.*]]) + // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 return _mm_ceil_sd(x, y); } __m128 test_mm_ceil_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_ceil_ss - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2) + // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call float @llvm.ceil.f32(float %[[A:.*]]) + // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 return _mm_ceil_ss(x, y); } @@ -256,25 +260,29 @@ TEST_CONSTEXPR(_mm_extract_ps(((__m128){1.25f, 2.5f, 3.75f, 5.0f}), 6) == __buil __m128d test_mm_floor_pd(__m128d x) { // CHECK-LABEL: test_mm_floor_pd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1) + // CHECK: %{{.*}} = call <2 x double> @llvm.floor.v2f64(<2 x double> %{{.*}}) return _mm_floor_pd(x); } __m128 test_mm_floor_ps(__m128 x) { // CHECK-LABEL: test_mm_floor_ps - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1) + // CHECK: %{{.*}} = call <4 x float> @llvm.floor.v4f32(<4 x float> %{{.*}}) return _mm_floor_ps(x); } __m128d test_mm_floor_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_floor_sd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1) + // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call double @llvm.floor.f64(double %[[A:.*]]) + // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 return _mm_floor_sd(x, y); } __m128 test_mm_floor_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_floor_ss - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1) + // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call float @llvm.floor.f32(float %[[A:.*]]) + // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 return _mm_floor_ss(x, y); } @@ -430,25 +438,29 @@ TEST_CONSTEXPR(match_v8hi(_mm_packus_epi32((__m128i)(__v4si){40000, -50000, 3276 __m128d test_mm_round_pd(__m128d x) { // CHECK-LABEL: test_mm_round_pd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 4) + // CHECK: %{{.*}} = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict") return _mm_round_pd(x, 4); } __m128 test_mm_round_ps(__m128 x) { // CHECK-LABEL: test_mm_round_ps - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 4) + // CHECK: %{{.*}} = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict") return _mm_round_ps(x, 4); } __m128d test_mm_round_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_round_sd - // CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 4) + // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call double @llvm.experimental.constrained.floor.f64(double %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 return _mm_round_sd(x, y, 4); } __m128 test_mm_round_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_round_ss - // CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 4) + // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 + // CHECK: %[[B:.*]] = call float @llvm.experimental.constrained.floor.f32(float %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 return _mm_round_ss(x, y, 4); } >From aab58a9b7309e3daa7b95ddc49da33ddebfef2cb Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Sun, 7 Dec 2025 23:39:05 +0000 Subject: [PATCH 3/8] remove ClangBuiltin from ops --- llvm/include/llvm/IR/IntrinsicsX86.td | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 1dd23f60c7e1e..6369e97f807fb 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -626,17 +626,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FP rounding ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_round_ss : ClangBuiltin<"__builtin_ia32_roundss">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, + def int_x86_sse41_round_ss : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_sse41_round_ps : ClangBuiltin<"__builtin_ia32_roundps">, - DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, + def int_x86_sse41_round_ps : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - def int_x86_sse41_round_sd : ClangBuiltin<"__builtin_ia32_roundsd">, - DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, + def int_x86_sse41_round_sd : DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_sse41_round_pd : ClangBuiltin<"__builtin_ia32_roundpd">, - DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, + def int_x86_sse41_round_pd : DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; } @@ -921,11 +917,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_rcp_ps_256 : ClangBuiltin<"__builtin_ia32_rcpps256">, DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_round_pd_256 : ClangBuiltin<"__builtin_ia32_roundpd256">, - DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], + def int_x86_avx_round_pd_256 : DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - def int_x86_avx_round_ps_256 : ClangBuiltin<"__builtin_ia32_roundps256">, - DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], + def int_x86_avx_round_ps_256 : DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; } >From 156d2aa3b72e90699885973f85aaacc0eb930435 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Mon, 8 Dec 2025 23:06:21 +0000 Subject: [PATCH 4/8] moving rounding functionality to helper --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 155 ++++++++++------------- 1 file changed, 69 insertions(+), 86 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index a3e5c48629228..167ad4478e6b1 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -75,6 +75,70 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, return MaskVec; } +static Value *emitX86Round(CodeGenFunction &CGF, + Value *X, + unsigned M) { + unsigned RoundingMask = 0b11; + unsigned UpdatePEBit = 0b100; + unsigned UseMXCSRBit = 0b1000; + + unsigned roundingMode = M & RoundingMask; + bool updatePE = M & UpdatePEBit; + bool useMXCSR = M & UseMXCSRBit; + + Intrinsic::ID ID = Intrinsic::not_intrinsic; + LLVMContext &Ctx = CGF.CGM.getLLVMContext(); + + if (useMXCSR) { + ID = Intrinsic::experimental_constrained_nearbyint; + + auto PE_metatadata = updatePE ? "fpexcept.strict" : "fpexcept.ignore"; + + Value *ExceptMode = MetadataAsValue::get( + Ctx, + MDString::get(Ctx, PE_metatadata) + ); + + Value *RoundingMode = MetadataAsValue::get( + Ctx, + MDString::get(Ctx, "rounding.dynamic") + ); + + Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); + return CGF.Builder.CreateCall(F, {X, ExceptMode, RoundingMode}); + } + + if (updatePE) { + switch (roundingMode) { + case 0b00: ID = Intrinsic::experimental_constrained_roundeven; break; + case 0b01: ID = Intrinsic::experimental_constrained_floor; break; + case 0b10: ID = Intrinsic::experimental_constrained_ceil; break; + case 0b11: ID = Intrinsic::experimental_constrained_trunc; break; + default: llvm_unreachable("Invalid rounding mode"); + } + + Value *ExceptMode =MetadataAsValue::get( + Ctx, + MDString::get(Ctx, "fpexcept.strict") + ); + + Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); + return CGF.Builder.CreateCall(F, {X, ExceptMode}); + } + + // Otherwise we can use the standard ops + switch (roundingMode) { + case 0b00: ID = Intrinsic::roundeven; break; + case 0b01: ID = Intrinsic::floor; break; + case 0b10: ID = Intrinsic::ceil; break; + case 0b11: ID = Intrinsic::trunc; break; + default: llvm_unreachable("Invalid rounding mode"); + } + + Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); + return CGF.Builder.CreateCall(F, {X}); +} + static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops, Align Alignment) { Value *Ptr = Ops[0]; @@ -843,100 +907,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_roundps: case X86::BI__builtin_ia32_roundpd: case X86::BI__builtin_ia32_roundps256: - case X86::BI__builtin_ia32_roundpd256: { + case X86::BI__builtin_ia32_roundpd256: { unsigned M = cast<ConstantInt>(Ops[1])->getZExtValue(); - unsigned roundingModeAndPE = M & 0b111; - unsigned updatePE = M & 0b100; - unsigned use_MXCSR = M & 0b1000; - - Intrinsic::ID ID; - - // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction - if (use_MXCSR) { - switch (BuiltinID) { - case X86::BI__builtin_ia32_roundps: ID = Intrinsic::x86_sse41_round_ps; break; - case X86::BI__builtin_ia32_roundpd: ID = Intrinsic::x86_sse41_round_pd; break; - } - return nullptr; - } else { - switch (roundingModeAndPE) { - default: return nullptr; - case 0b000: ID = Intrinsic::nearbyint; break; - case 0b001: ID = Intrinsic::floor; break; - case 0b010: ID = Intrinsic::ceil; break; - case 0b011: ID = Intrinsic::trunc; break; - case 0b100: ID = Intrinsic::experimental_constrained_floor; break; // TODO: replace with actual op - case 0b101: ID = Intrinsic::experimental_constrained_floor; break; - case 0b110: ID = Intrinsic::experimental_constrained_ceil; break; - case 0b111: ID = Intrinsic::experimental_constrained_trunc; break; - } - } - - Function *F = CGM.getIntrinsic(ID, Ops[0]->getType()); - - if (updatePE) { - LLVMContext &Ctx = CGM.getLLVMContext(); - - Value *ExceptMode =MetadataAsValue::get( - Ctx, - MDString::get(Ctx, "fpexcept.strict") - ); - - return Builder.CreateCall(F, {Ops[0], ExceptMode}); - } - - return Builder.CreateCall(F, {Ops[0]}); + return emitX86Round(*this, Ops[0], M); } case X86::BI__builtin_ia32_roundss: case X86::BI__builtin_ia32_roundsd: { unsigned M = cast<ConstantInt>(Ops[2])->getZExtValue(); - unsigned roundingModeAndPE = M & 0b111; - unsigned updatePE = M & 0b100; - unsigned use_MXCSR = M & 0b1000; - Intrinsic::ID ID; - - // Currently no ops for MXCSR bit set, so lower directly to SSE41 instruction - if (use_MXCSR) { - switch (BuiltinID) { - case X86::BI__builtin_ia32_roundss: ID = Intrinsic::x86_sse41_round_ss; break; - case X86::BI__builtin_ia32_roundsd: ID = Intrinsic::x86_sse41_round_sd; break; - } - return nullptr; - } else { - switch (roundingModeAndPE) { - default: return nullptr; - case 0b000: ID = Intrinsic::nearbyint; break; - case 0b001: ID = Intrinsic::floor; break; - case 0b010: ID = Intrinsic::ceil; break; - case 0b011: ID = Intrinsic::trunc; break; - case 0b100: ID = Intrinsic::experimental_constrained_floor; break; // TODO: replace with actual op - case 0b101: ID = Intrinsic::experimental_constrained_floor; break; - case 0b110: ID = Intrinsic::experimental_constrained_ceil; break; - case 0b111: ID = Intrinsic::experimental_constrained_trunc; break; - } - } - Value *idx = Builder.getInt32(0); - Value *b0 = Builder.CreateExtractElement(Ops[1], idx); - Value *rounded0; - - Function *F = CGM.getIntrinsic(ID, b0->getType()); - - if (updatePE) { - LLVMContext &Ctx = CGM.getLLVMContext(); - - Value *ExceptMode =MetadataAsValue::get( - Ctx, - MDString::get(Ctx, "fpexcept.strict") - ); - - rounded0 = Builder.CreateCall(F, {b0, ExceptMode}); - } else { - rounded0 = Builder.CreateCall(F, {b0}); - } + Value *ValAt0 = Builder.CreateExtractElement(Ops[1], idx); + Value *RoundedAt0 = emitX86Round(*this, ValAt0, M); - return Builder.CreateInsertElement(Ops[0], rounded0, idx); + return Builder.CreateInsertElement(Ops[0], RoundedAt0, idx); } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: >From 7ab45f89c6e3092b210ab2c12a24bd706b8de41c Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Mon, 8 Dec 2025 23:06:41 +0000 Subject: [PATCH 5/8] update tests --- clang/test/CodeGen/X86/avx-builtins.c | 4 ++-- clang/test/CodeGen/X86/sse41-builtins.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 506327bc910c7..f3844adf0a498 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1511,13 +1511,13 @@ __m256 test_mm256_rcp_ps(__m256 A) { __m256d test_mm256_round_pd(__m256d x) { // CHECK-LABEL: test_mm256_round_pd - // CHECK: %{{.*}} = call <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %{{.*}}, metadata !"fpexcept.strict") return _mm256_round_pd(x, 4); } __m256 test_mm256_round_ps(__m256 x) { // CHECK-LABEL: test_mm256_round_ps - // CHECK: %{{.*}} = call <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float> %{{.*}}, metadata !"fpexcept.strict") return _mm256_round_ps(x, 4); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 9163b14a9fc11..f084e1dfade15 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -438,20 +438,20 @@ TEST_CONSTEXPR(match_v8hi(_mm_packus_epi32((__m128i)(__v4si){40000, -50000, 3276 __m128d test_mm_round_pd(__m128d x) { // CHECK-LABEL: test_mm_round_pd - // CHECK: %{{.*}} = call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict") return _mm_round_pd(x, 4); } __m128 test_mm_round_ps(__m128 x) { // CHECK-LABEL: test_mm_round_ps - // CHECK: %{{.*}} = call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict") return _mm_round_ps(x, 4); } __m128d test_mm_round_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_round_sd // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: %[[B:.*]] = call double @llvm.experimental.constrained.floor.f64(double %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %[[B:.*]] = call double @llvm.experimental.constrained.roundeven.f64(double %[[A:.*]], metadata !"fpexcept.strict") // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 return _mm_round_sd(x, y, 4); } @@ -459,7 +459,7 @@ __m128d test_mm_round_sd(__m128d x, __m128d y) { __m128 test_mm_round_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_round_ss // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: %[[B:.*]] = call float @llvm.experimental.constrained.floor.f32(float %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %[[B:.*]] = call float @llvm.experimental.constrained.roundeven.f32(float %[[A:.*]], metadata !"fpexcept.strict") // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 return _mm_round_ss(x, y, 4); } >From 62a18f9a34347c6243c15e0c627b1d74cfe916a7 Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Mon, 8 Dec 2025 23:07:42 +0000 Subject: [PATCH 6/8] format --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 76 ++++++++++++++---------- llvm/include/llvm/IR/IntrinsicsX86.td | 32 ++++++---- 2 files changed, 63 insertions(+), 45 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index 167ad4478e6b1..c8b55e855e717 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -75,52 +75,53 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, return MaskVec; } -static Value *emitX86Round(CodeGenFunction &CGF, - Value *X, - unsigned M) { +static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned M) { unsigned RoundingMask = 0b11; unsigned UpdatePEBit = 0b100; unsigned UseMXCSRBit = 0b1000; - + unsigned roundingMode = M & RoundingMask; bool updatePE = M & UpdatePEBit; bool useMXCSR = M & UseMXCSRBit; - + Intrinsic::ID ID = Intrinsic::not_intrinsic; LLVMContext &Ctx = CGF.CGM.getLLVMContext(); - + if (useMXCSR) { ID = Intrinsic::experimental_constrained_nearbyint; - + auto PE_metatadata = updatePE ? "fpexcept.strict" : "fpexcept.ignore"; - Value *ExceptMode = MetadataAsValue::get( - Ctx, - MDString::get(Ctx, PE_metatadata) - ); + Value *ExceptMode = + MetadataAsValue::get(Ctx, MDString::get(Ctx, PE_metatadata)); - Value *RoundingMode = MetadataAsValue::get( - Ctx, - MDString::get(Ctx, "rounding.dynamic") - ); + Value *RoundingMode = + MetadataAsValue::get(Ctx, MDString::get(Ctx, "rounding.dynamic")); Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); return CGF.Builder.CreateCall(F, {X, ExceptMode, RoundingMode}); - } + } if (updatePE) { switch (roundingMode) { - case 0b00: ID = Intrinsic::experimental_constrained_roundeven; break; - case 0b01: ID = Intrinsic::experimental_constrained_floor; break; - case 0b10: ID = Intrinsic::experimental_constrained_ceil; break; - case 0b11: ID = Intrinsic::experimental_constrained_trunc; break; - default: llvm_unreachable("Invalid rounding mode"); + case 0b00: + ID = Intrinsic::experimental_constrained_roundeven; + break; + case 0b01: + ID = Intrinsic::experimental_constrained_floor; + break; + case 0b10: + ID = Intrinsic::experimental_constrained_ceil; + break; + case 0b11: + ID = Intrinsic::experimental_constrained_trunc; + break; + default: + llvm_unreachable("Invalid rounding mode"); } - Value *ExceptMode =MetadataAsValue::get( - Ctx, - MDString::get(Ctx, "fpexcept.strict") - ); + Value *ExceptMode = + MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.strict")); Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); return CGF.Builder.CreateCall(F, {X, ExceptMode}); @@ -128,11 +129,20 @@ static Value *emitX86Round(CodeGenFunction &CGF, // Otherwise we can use the standard ops switch (roundingMode) { - case 0b00: ID = Intrinsic::roundeven; break; - case 0b01: ID = Intrinsic::floor; break; - case 0b10: ID = Intrinsic::ceil; break; - case 0b11: ID = Intrinsic::trunc; break; - default: llvm_unreachable("Invalid rounding mode"); + case 0b00: + ID = Intrinsic::roundeven; + break; + case 0b01: + ID = Intrinsic::floor; + break; + case 0b10: + ID = Intrinsic::ceil; + break; + case 0b11: + ID = Intrinsic::trunc; + break; + default: + llvm_unreachable("Invalid rounding mode"); } Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); @@ -907,18 +917,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_roundps: case X86::BI__builtin_ia32_roundpd: case X86::BI__builtin_ia32_roundps256: - case X86::BI__builtin_ia32_roundpd256: { + case X86::BI__builtin_ia32_roundpd256: { unsigned M = cast<ConstantInt>(Ops[1])->getZExtValue(); return emitX86Round(*this, Ops[0], M); } case X86::BI__builtin_ia32_roundss: case X86::BI__builtin_ia32_roundsd: { unsigned M = cast<ConstantInt>(Ops[2])->getZExtValue(); - + Value *idx = Builder.getInt32(0); Value *ValAt0 = Builder.CreateExtractElement(Ops[1], idx); Value *RoundedAt0 = emitX86Round(*this, ValAt0, M); - + return Builder.CreateInsertElement(Ops[0], RoundedAt0, idx); } case X86::BI__builtin_ia32_lzcnt_u16: diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 6369e97f807fb..7838e410badd7 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -626,14 +626,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // FP rounding ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse41_round_ss : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_sse41_round_ps : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; - def int_x86_sse41_round_sd : DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_sse41_round_pd : DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_x86_sse41_round_ss + : DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_sse41_round_ps + : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_x86_sse41_round_sd + : DefaultAttrsIntrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_sse41_round_pd + : DefaultAttrsIntrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; } // Vector min element @@ -917,10 +923,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx_rcp_ps_256 : ClangBuiltin<"__builtin_ia32_rcpps256">, DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; - def int_x86_avx_round_pd_256 : DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<ArgIndex<1>>]>; - def int_x86_avx_round_ps_256 : DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_x86_avx_round_pd_256 + : DefaultAttrsIntrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_x86_avx_round_ps_256 + : DefaultAttrsIntrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; } // Horizontal ops >From c4eff0d6076ebaeaaef481f4adc914bfc349ec4a Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Tue, 9 Dec 2025 18:04:23 +0000 Subject: [PATCH 7/8] resolving comments --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 43 ++++-------------------- clang/test/CodeGen/X86/avx-builtins.c | 4 +-- clang/test/CodeGen/X86/sse41-builtins.c | 8 ++--- 3 files changed, 13 insertions(+), 42 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index c8b55e855e717..d4c25cdc8b0ab 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -75,14 +75,13 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, return MaskVec; } -static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned M) { - unsigned RoundingMask = 0b11; - unsigned UpdatePEBit = 0b100; - unsigned UseMXCSRBit = 0b1000; +// Emit rounding for the value X according to the rounding RoundingControl. +static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned RoundingControl) { + unsigned roundingMask = 0b11; + unsigned useMXCSRBit = 0b1000; - unsigned roundingMode = M & RoundingMask; - bool updatePE = M & UpdatePEBit; - bool useMXCSR = M & UseMXCSRBit; + unsigned roundingMode = RoundingControl & roundingMask; + bool useMXCSR = RoundingControl & useMXCSRBit; Intrinsic::ID ID = Intrinsic::not_intrinsic; LLVMContext &Ctx = CGF.CGM.getLLVMContext(); @@ -90,10 +89,8 @@ static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned M) { if (useMXCSR) { ID = Intrinsic::experimental_constrained_nearbyint; - auto PE_metatadata = updatePE ? "fpexcept.strict" : "fpexcept.ignore"; - Value *ExceptMode = - MetadataAsValue::get(Ctx, MDString::get(Ctx, PE_metatadata)); + MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")); Value *RoundingMode = MetadataAsValue::get(Ctx, MDString::get(Ctx, "rounding.dynamic")); @@ -102,32 +99,6 @@ static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned M) { return CGF.Builder.CreateCall(F, {X, ExceptMode, RoundingMode}); } - if (updatePE) { - switch (roundingMode) { - case 0b00: - ID = Intrinsic::experimental_constrained_roundeven; - break; - case 0b01: - ID = Intrinsic::experimental_constrained_floor; - break; - case 0b10: - ID = Intrinsic::experimental_constrained_ceil; - break; - case 0b11: - ID = Intrinsic::experimental_constrained_trunc; - break; - default: - llvm_unreachable("Invalid rounding mode"); - } - - Value *ExceptMode = - MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.strict")); - - Function *F = CGF.CGM.getIntrinsic(ID, X->getType()); - return CGF.Builder.CreateCall(F, {X, ExceptMode}); - } - - // Otherwise we can use the standard ops switch (roundingMode) { case 0b00: ID = Intrinsic::roundeven; diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index f3844adf0a498..e6b8b57b8cb30 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1511,13 +1511,13 @@ __m256 test_mm256_rcp_ps(__m256 A) { __m256d test_mm256_round_pd(__m256d x) { // CHECK-LABEL: test_mm256_round_pd - // CHECK: %{{.*}} = call <4 x double> @llvm.experimental.constrained.roundeven.v4f64(<4 x double> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %{{.*}}) return _mm256_round_pd(x, 4); } __m256 test_mm256_round_ps(__m256 x) { // CHECK-LABEL: test_mm256_round_ps - // CHECK: %{{.*}} = call <8 x float> @llvm.experimental.constrained.roundeven.v8f32(<8 x float> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %{{.*}}) return _mm256_round_ps(x, 4); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index f084e1dfade15..3d2eacfcb9287 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -438,20 +438,20 @@ TEST_CONSTEXPR(match_v8hi(_mm_packus_epi32((__m128i)(__v4si){40000, -50000, 3276 __m128d test_mm_round_pd(__m128d x) { // CHECK-LABEL: test_mm_round_pd - // CHECK: %{{.*}} = call <2 x double> @llvm.experimental.constrained.roundeven.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %{{.*}}) return _mm_round_pd(x, 4); } __m128 test_mm_round_ps(__m128 x) { // CHECK-LABEL: test_mm_round_ps - // CHECK: %{{.*}} = call <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict") + // CHECK: %{{.*}} = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %{{.*}}) return _mm_round_ps(x, 4); } __m128d test_mm_round_sd(__m128d x, __m128d y) { // CHECK-LABEL: test_mm_round_sd // CHECK: %[[A:.*]] = extractelement <2 x double> %{{.*}}, i32 0 - // CHECK: %[[B:.*]] = call double @llvm.experimental.constrained.roundeven.f64(double %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %[[B:.*]] = call double @llvm.roundeven.f64(double %[[A:.*]]) // CHECK: %{{.*}} = insertelement <2 x double> %0, double %[[B:.*]], i32 0 return _mm_round_sd(x, y, 4); } @@ -459,7 +459,7 @@ __m128d test_mm_round_sd(__m128d x, __m128d y) { __m128 test_mm_round_ss(__m128 x, __m128 y) { // CHECK-LABEL: test_mm_round_ss // CHECK: %[[A:.*]] = extractelement <4 x float> %{{.*}}, i32 0 - // CHECK: %[[B:.*]] = call float @llvm.experimental.constrained.roundeven.f32(float %[[A:.*]], metadata !"fpexcept.strict") + // CHECK: %[[B:.*]] = call float @llvm.roundeven.f32(float %[[A:.*]]) // CHECK: %{{.*}} = insertelement <4 x float> %0, float %[[B:.*]], i32 0 return _mm_round_ss(x, y, 4); } >From 9b2cda2ccaa0631133d0dee1e378951d1db09cce Mon Sep 17 00:00:00 2001 From: stomfaig <[email protected]> Date: Tue, 9 Dec 2025 18:04:39 +0000 Subject: [PATCH 8/8] format --- clang/lib/CodeGen/TargetBuiltins/X86.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/X86.cpp b/clang/lib/CodeGen/TargetBuiltins/X86.cpp index d4c25cdc8b0ab..fc10f460e6dc4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/X86.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/X86.cpp @@ -76,7 +76,8 @@ static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, } // Emit rounding for the value X according to the rounding RoundingControl. -static Value *emitX86Round(CodeGenFunction &CGF, Value *X, unsigned RoundingControl) { +static Value *emitX86Round(CodeGenFunction &CGF, Value *X, + unsigned RoundingControl) { unsigned roundingMask = 0b11; unsigned useMXCSRBit = 0b1000; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
