https://github.com/ficol created https://github.com/llvm/llvm-project/pull/166905
Currently only __builtin_elementwise_sqrt emits contrained fp intrinsic and propagates fp options. This commit adds this support for the rest of elementwise builtins. >From 4e1ffa02e4493ba5c84f64e534909479a9acb3ef Mon Sep 17 00:00:00 2001 From: "Ficek, Jakub" <[email protected]> Date: Fri, 7 Nov 2025 08:41:04 +0100 Subject: [PATCH] [clang] Support constrained fp elementwise builtins Currently only __builtin_elementwise_sqrt emits contrained fp intrinsic and propagates fp options. This commit adds this support for the rest of elementwise builtins. --- clang/lib/CodeGen/CGBuiltin.cpp | 107 ++++-------------- .../test/CodeGen/fp-contract-fast-pragma.cpp | 52 ++++++++- 2 files changed, 72 insertions(+), 87 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0a2ea416e5e4d..1da63e57ac8a1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2727,6 +2727,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_acosf16: case Builtin::BI__builtin_acosl: case Builtin::BI__builtin_acosf128: + case Builtin::BI__builtin_elementwise_acos: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos)); @@ -2738,6 +2739,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_asinf16: case Builtin::BI__builtin_asinl: case Builtin::BI__builtin_asinf128: + case Builtin::BI__builtin_elementwise_asin: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin)); @@ -2749,6 +2751,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_atanf16: case Builtin::BI__builtin_atanl: case Builtin::BI__builtin_atanf128: + case Builtin::BI__builtin_elementwise_atan: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); @@ -2760,6 +2763,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_atan2f16: case Builtin::BI__builtin_atan2l: case Builtin::BI__builtin_atan2f128: + case Builtin::BI__builtin_elementwise_atan2: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan2, Intrinsic::experimental_constrained_atan2)); @@ -2772,6 +2776,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: case Builtin::BI__builtin_ceilf128: + case Builtin::BI__builtin_elementwise_ceil: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)); @@ -2795,6 +2800,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: case Builtin::BI__builtin_cosf128: + case Builtin::BI__builtin_elementwise_cos: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); @@ -2807,6 +2813,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_coshf16: case Builtin::BI__builtin_coshl: case Builtin::BI__builtin_coshf128: + case Builtin::BI__builtin_elementwise_cosh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh)); @@ -2818,6 +2825,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: case Builtin::BI__builtin_expf128: + case Builtin::BI__builtin_elementwise_exp: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp)); @@ -2830,6 +2838,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: case Builtin::BI__builtin_exp2f128: + case Builtin::BI__builtin_elementwise_exp2: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); @@ -2838,6 +2847,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp10f16: case Builtin::BI__builtin_exp10l: case Builtin::BI__builtin_exp10f128: { + case Builtin::BI__builtin_elementwise_exp10: // TODO: strictfp support if (Builder.getIsFPConstrained()) break; @@ -2863,6 +2873,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: case Builtin::BI__builtin_floorf128: + case Builtin::BI__builtin_elementwise_floor: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)); @@ -2875,6 +2886,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: case Builtin::BI__builtin_fmaf128: + case Builtin::BI__builtin_elementwise_fma: return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::fma, Intrinsic::experimental_constrained_fma)); @@ -2950,6 +2962,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: case Builtin::BI__builtin_logf128: + case Builtin::BI__builtin_elementwise_log: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log, Intrinsic::experimental_constrained_log)); @@ -2962,6 +2975,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: case Builtin::BI__builtin_log10f128: + case Builtin::BI__builtin_elementwise_log10: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10)); @@ -2974,6 +2988,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: case Builtin::BI__builtin_log2f128: + case Builtin::BI__builtin_elementwise_log2: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2)); @@ -2985,6 +3000,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: case Builtin::BI__builtin_nearbyintf128: + case Builtin::BI__builtin_elementwise_nearbyint: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::nearbyint, Intrinsic::experimental_constrained_nearbyint)); @@ -2997,6 +3013,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: case Builtin::BI__builtin_powf128: + case Builtin::BI__builtin_elementwise_pow: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow)); @@ -3009,6 +3026,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: case Builtin::BI__builtin_rintf128: + case Builtin::BI__builtin_elementwise_rint: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)); @@ -3021,6 +3039,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: case Builtin::BI__builtin_roundf128: + case Builtin::BI__builtin_elementwise_round: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)); @@ -3033,6 +3052,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_roundevenf16: case Builtin::BI__builtin_roundevenl: case Builtin::BI__builtin_roundevenf128: + case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::roundeven, Intrinsic::experimental_constrained_roundeven)); @@ -3045,6 +3065,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: case Builtin::BI__builtin_sinf128: + case Builtin::BI__builtin_elementwise_sin: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); @@ -3057,6 +3078,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinhf16: case Builtin::BI__builtin_sinhl: case Builtin::BI__builtin_sinhf128: + case Builtin::BI__builtin_elementwise_sinh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); @@ -3104,6 +3126,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_tanf16: case Builtin::BI__builtin_tanl: case Builtin::BI__builtin_tanf128: + case Builtin::BI__builtin_elementwise_tan: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); @@ -3115,6 +3138,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_tanhf16: case Builtin::BI__builtin_tanhl: case Builtin::BI__builtin_tanhf128: + case Builtin::BI__builtin_elementwise_tanh: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh)); @@ -3126,6 +3150,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: case Builtin::BI__builtin_truncf128: + case Builtin::BI__builtin_elementwise_trunc: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)); @@ -3964,100 +3989,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } - case Builtin::BI__builtin_elementwise_acos: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::acos, "elt.acos")); - case Builtin::BI__builtin_elementwise_asin: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::asin, "elt.asin")); - case Builtin::BI__builtin_elementwise_atan: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::atan, "elt.atan")); - case Builtin::BI__builtin_elementwise_atan2: - return RValue::get(emitBuiltinWithOneOverloadedType<2>( - *this, E, Intrinsic::atan2, "elt.atan2")); - case Builtin::BI__builtin_elementwise_ceil: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::ceil, "elt.ceil")); - case Builtin::BI__builtin_elementwise_exp: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp, "elt.exp")); - case Builtin::BI__builtin_elementwise_exp2: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp2, "elt.exp2")); - case Builtin::BI__builtin_elementwise_exp10: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::exp10, "elt.exp10")); - case Builtin::BI__builtin_elementwise_ldexp: { - Value *Src = EmitScalarExpr(E->getArg(0)); - Value *Exp = EmitScalarExpr(E->getArg(1)); - Value *Result = Builder.CreateLdexp(Src, Exp, {}, "elt.ldexp"); - return RValue::get(Result); - } - case Builtin::BI__builtin_elementwise_log: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log, "elt.log")); - case Builtin::BI__builtin_elementwise_log2: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log2, "elt.log2")); - case Builtin::BI__builtin_elementwise_log10: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::log10, "elt.log10")); - case Builtin::BI__builtin_elementwise_pow: { - return RValue::get( - emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::pow)); - } case Builtin::BI__builtin_elementwise_bitreverse: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::bitreverse, "elt.bitreverse")); - case Builtin::BI__builtin_elementwise_cos: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::cos, "elt.cos")); - case Builtin::BI__builtin_elementwise_cosh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::cosh, "elt.cosh")); - case Builtin::BI__builtin_elementwise_floor: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_popcount: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::ctpop, "elt.ctpop")); - case Builtin::BI__builtin_elementwise_roundeven: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::roundeven, "elt.roundeven")); - case Builtin::BI__builtin_elementwise_round: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::round, "elt.round")); - case Builtin::BI__builtin_elementwise_rint: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::rint, "elt.rint")); - case Builtin::BI__builtin_elementwise_nearbyint: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::nearbyint, "elt.nearbyint")); - case Builtin::BI__builtin_elementwise_sin: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::sin, "elt.sin")); - case Builtin::BI__builtin_elementwise_sinh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::sinh, "elt.sinh")); - case Builtin::BI__builtin_elementwise_tan: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::tan, "elt.tan")); - case Builtin::BI__builtin_elementwise_tanh: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::tanh, "elt.tanh")); - case Builtin::BI__builtin_elementwise_trunc: - return RValue::get(emitBuiltinWithOneOverloadedType<1>( - *this, E, Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: return RValue::get( emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); - case Builtin::BI__builtin_elementwise_fma: - return RValue::get( - emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma)); case Builtin::BI__builtin_elementwise_fshl: return RValue::get( emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl)); diff --git a/clang/test/CodeGen/fp-contract-fast-pragma.cpp b/clang/test/CodeGen/fp-contract-fast-pragma.cpp index 0bb01d6e17a1d..491590d4ce384 100644 --- a/clang/test/CodeGen/fp-contract-fast-pragma.cpp +++ b/clang/test/CodeGen/fp-contract-fast-pragma.cpp @@ -89,17 +89,59 @@ float fp_contract_6(float a, float b, float c) { #pragma clang fp contract(fast) -float fp_contract_7(float a) { +float fp_contract_7(float a, float b, float c) { // COMMON: _Z13fp_contract_7f // CHECK: tail call contract float @llvm.sqrt.f32(float %a) +// CHECK: tail call contract float @llvm.ceil.f32(float %a) +// CHECK: tail call contract float @llvm.sin.f32(float %a) +// CHECK: tail call contract float @llvm.pow.f32(float %a, float %b) +// CHECK: tail call contract float @llvm.exp.f32(float %a) +// CHECK: tail call contract float @llvm.fma.f32(float %a, float %b, float %c) +// CHECK: tail call contract float @llvm.nearbyint.f32(float %a) +// CHECK: tail call contract float @llvm.log2.f32(float %a) // STRICT: tail call contract float @llvm.experimental.constrained.sqrt.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") - return __builtin_sqrtf(a); +// STRICT: tail call contract float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.sin.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.pow.f32(float %a, float %b, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.exp.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.nearbyint.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call contract float @llvm.experimental.constrained.log2.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") + return __builtin_sqrtf(a) + + __builtin_elementwise_ceil(a) + + __builtin_elementwise_sin(a) + + __builtin_elementwise_pow(a, b) + + __builtin_elementwise_exp(a) + + __builtin_elementwise_fma(a, b, c) + + __builtin_elementwise_nearbyint(a) + + __builtin_elementwise_log2(a); } -float fp_contract_8(float a) { +float fp_contract_8(float a, float b, float c) { // COMMON: _Z13fp_contract_8f // CHECK: tail call float @llvm.sqrt.f32(float %a) +// CHECK: tail call float @llvm.ceil.f32(float %a) +// CHECK: tail call float @llvm.sin.f32(float %a) +// CHECK: tail call float @llvm.pow.f32(float %a, float %b) +// CHECK: tail call float @llvm.exp.f32(float %a) +// CHECK: tail call float @llvm.fma.f32(float %a, float %b, float %c) +// CHECK: tail call float @llvm.nearbyint.f32(float %a) +// CHECK: tail call float @llvm.log2.f32(float %a) // STRICT: tail call float @llvm.experimental.constrained.sqrt.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") -#pragma clang fp contract(off) - return __builtin_sqrtf(a); +// STRICT: tail call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.sin.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.pow.f32(float %a, float %b, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.exp.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.nearbyint.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +// STRICT: tail call float @llvm.experimental.constrained.log2.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") +#pragma clang fp contract(off) + return __builtin_sqrtf(a) + + __builtin_elementwise_ceil(a) + + __builtin_elementwise_sin(a) + + __builtin_elementwise_pow(a, b) + + __builtin_elementwise_exp(a) + + __builtin_elementwise_fma(a, b, c) + + __builtin_elementwise_nearbyint(a) + + __builtin_elementwise_log2(a); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
