https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/181084
Currently, RISC-V claims that it has VP_FMAXNUM and VP_FMINNUM, it is not correct, which should be VP_FMAXIMUMNUM and VP_FMINIMUMNUM. >From 482b39fd79365abedb51867c24f8e952468476f0 Mon Sep 17 00:00:00 2001 From: YunQiang Su <[email protected]> Date: Wed, 11 Feb 2026 13:12:50 +0800 Subject: [PATCH 1/4] Clang: Drop float support from __builtin_elementwise_max Now we have __builtin_elementwise_maxnum __builtin_elementwise_maximum __builtin_elementwise_maximumnum --- clang/docs/LanguageExtensions.rst | 12 +--- clang/docs/ReleaseNotes.rst | 3 + clang/lib/CodeGen/CGBuiltin.cpp | 32 ++++----- .../test/CodeGen/builtins-elementwise-math.c | 70 +------------------ .../CodeGen/strictfp-elementwise-builtins.cpp | 24 +++---- 5 files changed, 33 insertions(+), 108 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 29328355c3e6f..745000e79027c 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -839,16 +839,8 @@ of different sizes and signs is forbidden in binary and ternary builtins. T __builtin_elementwise_copysign(T x, T y) return the magnitude of x with the sign of y. floating point types T __builtin_elementwise_fmod(T x, T y) return the floating-point remainder of (x/y) whose sign floating point types matches the sign of x. - T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer and floating point types - For floating point types, follows semantics of maxNum - in IEEE 754-2008. See `LangRef - <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ - for the comparison. - T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer and floating point types - For floating point types, follows semantics of minNum - in IEEE 754-2008. See `LangRef - <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ - for the comparison. + T __builtin_elementwise_max(T x, T y) return x or y, whichever is larger integer types + T __builtin_elementwise_min(T x, T y) return x or y, whichever is smaller integer types T __builtin_elementwise_maxnum(T x, T y) return x or y, whichever is larger. Follows IEEE 754-2008 floating point types semantics (maxNum) with +0.0>-0.0. See `LangRef <http://llvm.org/docs/LangRef.html#i-fminmax-family>`_ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0dbea8efc2642..758982d6e6431 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -138,6 +138,9 @@ Non-comprehensive list of changes in this release Usable in constant expressions. Implicit conversion is supported for class/struct types with conversion operators. +- Removed float types support from ``__builtin_elementwise_max`` and + ``__builtin_elementwise_min``. + New Compiler Flags ------------------ - New option ``-fms-anonymous-structs`` / ``-fno-ms-anonymous-structs`` added diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cf686581240a5..bb66677fb40c9 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4066,30 +4066,26 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Result; - if (Op0->getType()->isIntOrIntVectorTy()) { - QualType Ty = E->getArg(0)->getType(); - if (auto *VecTy = Ty->getAs<VectorType>()) - Ty = VecTy->getElementType(); - Result = Builder.CreateBinaryIntrinsic( - Ty->isSignedIntegerType() ? Intrinsic::smax : Intrinsic::umax, Op0, - Op1, nullptr, "elt.max"); - } else - Result = Builder.CreateMaxNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.max"); + assert(Op0->getType()->isIntOrIntVectorTy()); + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic( + Ty->isSignedIntegerType() ? Intrinsic::smax : Intrinsic::umax, Op0, + Op1, nullptr, "elt.max"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_min: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Result; - if (Op0->getType()->isIntOrIntVectorTy()) { - QualType Ty = E->getArg(0)->getType(); - if (auto *VecTy = Ty->getAs<VectorType>()) - Ty = VecTy->getElementType(); - Result = Builder.CreateBinaryIntrinsic( - Ty->isSignedIntegerType() ? Intrinsic::smin : Intrinsic::umin, Op0, - Op1, nullptr, "elt.min"); - } else - Result = Builder.CreateMinNum(Op0, Op1, /*FMFSource=*/nullptr, "elt.min"); + assert(Op0->getType()->isIntOrIntVectorTy()); + QualType Ty = E->getArg(0)->getType(); + if (auto *VecTy = Ty->getAs<VectorType>()) + Ty = VecTy->getElementType(); + Result = Builder.CreateBinaryIntrinsic( + Ty->isSignedIntegerType() ? Intrinsic::smin : Intrinsic::umin, Op0, + Op1, nullptr, "elt.min"); return RValue::get(Result); } diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c index 2df485f0155c3..a201403e8b6b1 100644 --- a/clang/test/CodeGen/builtins-elementwise-math.c +++ b/clang/test/CodeGen/builtins-elementwise-math.c @@ -339,32 +339,10 @@ void test_builtin_elementwise_minimum(float f1, float f2, double d1, double d2, vf1 = __builtin_elementwise_minimum(vf2, cvf1); } -void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, - float4 vf1, float4 vf2, long long int i1, - long long int i2, si8 vi1, si8 vi2, +void test_builtin_elementwise_max(long long int i2, si8 vi1, si8 vi2, long long int i1, unsigned u1, unsigned u2, u4 vu1, u4 vu2, _BitInt(31) bi1, _BitInt(31) bi2, unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { - // CHECK-LABEL: define void @test_builtin_elementwise_max( - // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 - // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 - // CHECK-NEXT: call float @llvm.maxnum.f32(float [[F1]], float [[F2]]) - f1 = __builtin_elementwise_max(f1, f2); - - // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 - // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 - // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) - d1 = __builtin_elementwise_max(d1, d2); - - // CHECK: [[D2:%.+]] = load double, ptr %d2.addr, align 8 - // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) - d1 = __builtin_elementwise_max(20.0, d2); - - // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 - // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) - vf1 = __builtin_elementwise_max(vf1, vf2); - // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) @@ -403,17 +381,6 @@ void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, // CHECK-NEXT: call i55 @llvm.umax.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) bu1 = __builtin_elementwise_max(bu1, bu2); - // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 - // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) - const float4 cvf1 = vf1; - vf1 = __builtin_elementwise_max(cvf1, vf2); - - // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 - // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) - vf1 = __builtin_elementwise_max(vf2, cvf1); - // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) @@ -423,32 +390,10 @@ void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, i1 = __builtin_elementwise_max(1, 'a'); } -void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, - float4 vf1, float4 vf2, long long int i1, - long long int i2, si8 vi1, si8 vi2, +void test_builtin_elementwise_min(long long int i2, si8 vi1, si8 vi2, long long int i1, unsigned u1, unsigned u2, u4 vu1, u4 vu2, _BitInt(31) bi1, _BitInt(31) bi2, unsigned _BitInt(55) bu1, unsigned _BitInt(55) bu2) { - // CHECK-LABEL: define void @test_builtin_elementwise_min( - // CHECK: [[F1:%.+]] = load float, ptr %f1.addr, align 4 - // CHECK-NEXT: [[F2:%.+]] = load float, ptr %f2.addr, align 4 - // CHECK-NEXT: call float @llvm.minnum.f32(float [[F1]], float [[F2]]) - f1 = __builtin_elementwise_min(f1, f2); - - // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 - // CHECK-NEXT: [[D2:%.+]] = load double, ptr %d2.addr, align 8 - // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) - d1 = __builtin_elementwise_min(d1, d2); - - // CHECK: [[D1:%.+]] = load double, ptr %d1.addr, align 8 - // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) - d1 = __builtin_elementwise_min(d1, 2.0); - - // CHECK: [[VF1:%.+]] = load <4 x float>, ptr %vf1.addr, align 16 - // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) - vf1 = __builtin_elementwise_min(vf1, vf2); - // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr, align 8 // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr, align 8 // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) @@ -494,17 +439,6 @@ void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, // CHECK-NEXT: call i55 @llvm.umin.i55(i55 [[LOADEDV2]], i55 [[LOADEDV3]]) bu1 = __builtin_elementwise_min(bu1, bu2); - // CHECK: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 - // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) - const float4 cvf1 = vf1; - vf1 = __builtin_elementwise_min(cvf1, vf2); - - // CHECK: [[VF2:%.+]] = load <4 x float>, ptr %vf2.addr, align 16 - // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, ptr %cvf1, align 16 - // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) - vf1 = __builtin_elementwise_min(vf2, cvf1); - // CHECK: [[IAS1:%.+]] = load i32, ptr addrspace(1) @int_as_one, align 4 // CHECK-NEXT: [[B:%.+]] = load i32, ptr @b, align 4 // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) diff --git a/clang/test/CodeGen/strictfp-elementwise-builtins.cpp b/clang/test/CodeGen/strictfp-elementwise-builtins.cpp index 6453d50f044aa..7de0a396e08f9 100644 --- a/clang/test/CodeGen/strictfp-elementwise-builtins.cpp +++ b/clang/test/CodeGen/strictfp-elementwise-builtins.cpp @@ -27,24 +27,24 @@ float4 strict_elementwise_abs(float4 a) { return __builtin_elementwise_abs(a); } -// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_maxDv4_fS_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z25strict_elementwise_maxnumDv4_fS_ +// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_MAX:%.*]] = tail call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"fpexcept.strict") #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_MAX]] +// CHECK-NEXT: [[ELT_MAXNUM:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[ELT_MAXNUM]] // -float4 strict_elementwise_max(float4 a, float4 b) { - return __builtin_elementwise_max(a, b); +float4 strict_elementwise_maxnum(float4 a, float4 b) { + return __builtin_elementwise_maxnum(a, b); } -// CHECK-LABEL: define dso_local noundef <4 x float> @_Z22strict_elementwise_minDv4_fS_ -// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-LABEL: define dso_local noundef <4 x float> @_Z25strict_elementwise_minnumDv4_fS_ +// CHECK-SAME: (<4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[ELT_MIN:%.*]] = tail call <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float> [[A]], <4 x float> [[B]], metadata !"fpexcept.strict") #[[ATTR4]] -// CHECK-NEXT: ret <4 x float> [[ELT_MIN]] +// CHECK-NEXT: [[ELT_MINNUM:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A]], <4 x float> [[B]]) #[[ATTR4]] +// CHECK-NEXT: ret <4 x float> [[ELT_MINNUM]] // -float4 strict_elementwise_min(float4 a, float4 b) { - return __builtin_elementwise_min(a, b); +float4 strict_elementwise_minnum(float4 a, float4 b) { + return __builtin_elementwise_minnum(a, b); } // CHECK-LABEL: define dso_local noundef <4 x float> @_Z26strict_elementwise_maximumDv4_fS_ >From 0ec494c401ca2630a4b30d82bb2041c19df0e92a Mon Sep 17 00:00:00 2001 From: YunQiang Su <[email protected]> Date: Wed, 11 Feb 2026 13:27:21 +0800 Subject: [PATCH 2/4] fix code format --- clang/lib/CodeGen/CGBuiltin.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bb66677fb40c9..3daf08771c7a0 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4071,8 +4071,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (auto *VecTy = Ty->getAs<VectorType>()) Ty = VecTy->getElementType(); Result = Builder.CreateBinaryIntrinsic( - Ty->isSignedIntegerType() ? Intrinsic::smax : Intrinsic::umax, Op0, - Op1, nullptr, "elt.max"); + Ty->isSignedIntegerType() ? Intrinsic::smax : Intrinsic::umax, Op0, Op1, + nullptr, "elt.max"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_min: { @@ -4084,8 +4084,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (auto *VecTy = Ty->getAs<VectorType>()) Ty = VecTy->getElementType(); Result = Builder.CreateBinaryIntrinsic( - Ty->isSignedIntegerType() ? Intrinsic::smin : Intrinsic::umin, Op0, - Op1, nullptr, "elt.min"); + Ty->isSignedIntegerType() ? Intrinsic::smin : Intrinsic::umin, Op0, Op1, + nullptr, "elt.min"); return RValue::get(Result); } >From 52afce43f65c356e3d21073495e26ff96fcec6da Mon Sep 17 00:00:00 2001 From: YunQiang Su <[email protected]> Date: Wed, 11 Feb 2026 23:41:28 +0800 Subject: [PATCH 3/4] update test cases --- .../CodeGenHLSL/builtins/max-overloads.hlsl | 35 +------------ clang/test/CodeGenHLSL/builtins/max.hlsl | 49 +---------------- .../CodeGenHLSL/builtins/min-overloads.hlsl | 35 +------------ clang/test/CodeGenHLSL/builtins/min.hlsl | 52 +------------------ .../vec-scalar-compat-overload-warnings.hlsl | 20 ------- 5 files changed, 4 insertions(+), 187 deletions(-) diff --git a/clang/test/CodeGenHLSL/builtins/max-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/max-overloads.hlsl index da5cd8ff37510..6a99f2dee6c1f 100644 --- a/clang/test/CodeGenHLSL/builtins/max-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/max-overloads.hlsl @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -fnative-half-type -fnative-int16-type -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ -// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF-LABEL: define hidden noundef <4 x i16> {{.*}}test_max_short4_mismatch @@ -46,36 +46,3 @@ int64_t4 test_max_long4_mismatch(int64_t4 p0, int64_t p1) { return max(p0, p1); // CHECK: [[MAX:%.*]] = call noundef <4 x i64> @llvm.umax.v4i64(<4 x i64> %{{.*}}, <4 x i64> [[CONV1]]) // CHECK: ret <4 x i64> [[MAX]] uint64_t4 test_max_ulong4_mismatch(uint64_t4 p0, uint64_t p1) { return max(p0, p1); } - -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> {{.*}}test_max_half4_mismatch -// NATIVE_HALF: [[CONV0:%.*]] = insertelement <4 x half> poison, half %{{.*}}, i64 0 -// NATIVE_HALF: [[CONV1:%.*]] = shufflevector <4 x half> [[CONV0]], <4 x half> poison, <4 x i32> zeroinitializer -// NATIVE_HALF: [[MAX:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x half> @llvm.maxnum.v4f16(<4 x half> %{{.*}}, <4 x half> [[CONV1]]) -// NATIVE_HALF: ret <4 x half> [[MAX]] -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_max_half4_mismatch -// NO_HALF: [[CONV0:%.*]] = insertelement <4 x float> poison, float %{{.*}}, i64 0 -// NO_HALF: [[CONV1:%.*]] = shufflevector <4 x float> [[CONV0]], <4 x float> poison, <4 x i32> zeroinitializer -// NO_HALF: [[MAX:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x float> @llvm.maxnum.v4f32(<4 x float> %{{.*}}, <4 x float> [[CONV1]]) -// NO_HALF: ret <4 x float> [[MAX]] -half4 test_max_half4_mismatch(half4 p0, half p1) { return max(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_max_float4_mismatch -// CHECK: [[CONV0:%.*]] = insertelement <4 x float> poison, float %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x float> [[CONV0]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK: [[MAX:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x float> @llvm.maxnum.v4f32(<4 x float> %{{.*}}, <4 x float> [[CONV1]]) -// CHECK: ret <4 x float> [[MAX]] -float4 test_max_float4_mismatch(float4 p0, float p1) { return max(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> {{.*}}test_max_double4_mismatch -// CHECK: [[CONV0:%.*]] = insertelement <4 x double> poison, double %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x double> [[CONV0]], <4 x double> poison, <4 x i32> zeroinitializer -// CHECK: [[MAX:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x double> @llvm.maxnum.v4f64(<4 x double> %{{.*}}, <4 x double> [[CONV1]]) -// CHECK: ret <4 x double> [[MAX]] -double4 test_max_double4_mismatch(double4 p0, double p1) { return max(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> {{.*}}test_max_double4_mismatch2 -// CHECK: [[CONV0:%.*]] = insertelement <4 x double> poison, double %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x double> [[CONV0]], <4 x double> poison, <4 x i32> zeroinitializer -// CHECK: [[MAX:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x double> @llvm.maxnum.v4f64(<4 x double> [[CONV1]], <4 x double> %{{.*}}) -// CHECK: ret <4 x double> [[MAX]] -double4 test_max_double4_mismatch2(double4 p0, double p1) { return max(p1, p0); } diff --git a/clang/test/CodeGenHLSL/builtins/max.hlsl b/clang/test/CodeGenHLSL/builtins/max.hlsl index 9c621e62b5336..266859710947f 100644 --- a/clang/test/CodeGenHLSL/builtins/max.hlsl +++ b/clang/test/CodeGenHLSL/builtins/max.hlsl @@ -3,7 +3,7 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: FileCheck %s --check-prefixes=CHECK #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF-LABEL: define hidden noundef i16 @_Z14test_max_short @@ -84,50 +84,3 @@ uint64_t3 test_max_ulong3(uint64_t3 p0, uint64_t3 p1) { return max(p0, p1); } // CHECK-LABEL: define hidden noundef <4 x i64> @_Z15test_max_ulong4 // CHECK: call <4 x i64> @llvm.umax.v4i64 uint64_t4 test_max_ulong4(uint64_t4 p0, uint64_t4 p1) { return max(p0, p1); } - -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) half @_Z13test_max_half -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.maxnum.f16( -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) float @_Z13test_max_half -// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32( -half test_max_half(half p0, half p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x half> @_Z14test_max_half2 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.maxnum.v2f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z14test_max_half2 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32( -half2 test_max_half2(half2 p0, half2 p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x half> @_Z14test_max_half3 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.maxnum.v3f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z14test_max_half3 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32( -half3 test_max_half3(half3 p0, half3 p1) { return max(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z14test_max_half4 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.maxnum.v4f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z14test_max_half4 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32( -half4 test_max_half4(half4 p0, half4 p1) { return max(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z14test_max_float -// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.maxnum.f32( -float test_max_float(float p0, float p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z15test_max_float2 -// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.maxnum.v2f32 -float2 test_max_float2(float2 p0, float2 p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z15test_max_float3 -// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.maxnum.v3f32 -float3 test_max_float3(float3 p0, float3 p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z15test_max_float4 -// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.maxnum.v4f32 -float4 test_max_float4(float4 p0, float4 p1) { return max(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) double @_Z15test_max_double -// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.maxnum.f64( -double test_max_double(double p0, double p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x double> @_Z16test_max_double2 -// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.maxnum.v2f64 -double2 test_max_double2(double2 p0, double2 p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x double> @_Z16test_max_double3 -// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.maxnum.v3f64 -double3 test_max_double3(double3 p0, double3 p1) { return max(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> @_Z16test_max_double4 -// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.maxnum.v4f64 -double4 test_max_double4(double4 p0, double4 p1) { return max(p0, p1); } diff --git a/clang/test/CodeGenHLSL/builtins/min-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/min-overloads.hlsl index ee3455a07c8e1..bbbcc42181072 100644 --- a/clang/test/CodeGenHLSL/builtins/min-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/min-overloads.hlsl @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -fnative-half-type -fnative-int16-type -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ -// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF-LABEL: define hidden noundef <4 x i16> {{.*}}test_min_short4_mismatch @@ -46,36 +46,3 @@ int64_t4 test_min_long4_mismatch(int64_t4 p0, int64_t p1) { return min(p0, p1); // CHECK: [[MIN:%.*]] = call noundef <4 x i64> @llvm.umin.v4i64(<4 x i64> %{{.*}}, <4 x i64> [[CONV1]]) // CHECK: ret <4 x i64> [[MIN]] uint64_t4 test_min_ulong4_mismatch(uint64_t4 p0, uint64_t p1) { return min(p0, p1); } - -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> {{.*}}test_min_half4_mismatch -// NATIVE_HALF: [[CONV0:%.*]] = insertelement <4 x half> poison, half %{{.*}}, i64 0 -// NATIVE_HALF: [[CONV1:%.*]] = shufflevector <4 x half> [[CONV0]], <4 x half> poison, <4 x i32> zeroinitializer -// NATIVE_HALF: [[MIN:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x half> @llvm.minnum.v4f16(<4 x half> %{{.*}}, <4 x half> [[CONV1]]) -// NATIVE_HALF: ret <4 x half> [[MIN]] -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_min_half4_mismatch -// NO_HALF: [[CONV0:%.*]] = insertelement <4 x float> poison, float %{{.*}}, i64 0 -// NO_HALF: [[CONV1:%.*]] = shufflevector <4 x float> [[CONV0]], <4 x float> poison, <4 x i32> zeroinitializer -// NO_HALF: [[MIN:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x float> @llvm.minnum.v4f32(<4 x float> %{{.*}}, <4 x float> [[CONV1]]) -// NO_HALF: ret <4 x float> [[MIN]] -half4 test_min_half4_mismatch(half4 p0, half p1) { return min(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_min_float4_mismatch -// CHECK: [[CONV0:%.*]] = insertelement <4 x float> poison, float %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x float> [[CONV0]], <4 x float> poison, <4 x i32> zeroinitializer -// CHECK: [[MIN:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x float> @llvm.minnum.v4f32(<4 x float> %{{.*}}, <4 x float> [[CONV1]]) -// CHECK: ret <4 x float> [[MIN]] -float4 test_min_float4_mismatch(float4 p0, float p1) { return min(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> {{.*}}test_min_double4_mismatch -// CHECK: [[CONV0:%.*]] = insertelement <4 x double> poison, double %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x double> [[CONV0]], <4 x double> poison, <4 x i32> zeroinitializer -// CHECK: [[MIN:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x double> @llvm.minnum.v4f64(<4 x double> %{{.*}}, <4 x double> [[CONV1]]) -// CHECK: ret <4 x double> [[MIN]] -double4 test_min_double4_mismatch(double4 p0, double p1) { return min(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> {{.*}}test_min_double4_mismatch2 -// CHECK: [[CONV0:%.*]] = insertelement <4 x double> poison, double %{{.*}}, i64 0 -// CHECK: [[CONV1:%.*]] = shufflevector <4 x double> [[CONV0]], <4 x double> poison, <4 x i32> zeroinitializer -// CHECK: [[MIN:%.*]] = call reassoc nnan ninf nsz arcp afn noundef nofpclass(nan inf) <4 x double> @llvm.minnum.v4f64(<4 x double> [[CONV1]], <4 x double> %{{.*}}) -// CHECK: ret <4 x double> [[MIN]] -double4 test_min_double4_mismatch2(double4 p0, double p1) { return min(p1, p0); } diff --git a/clang/test/CodeGenHLSL/builtins/min.hlsl b/clang/test/CodeGenHLSL/builtins/min.hlsl index 44d2063229cdb..fae7888c6cce3 100644 --- a/clang/test/CodeGenHLSL/builtins/min.hlsl +++ b/clang/test/CodeGenHLSL/builtins/min.hlsl @@ -3,7 +3,7 @@ // RUN: FileCheck %s --check-prefixes=CHECK,NATIVE_HALF // RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library %s \ // RUN: -emit-llvm -disable-llvm-passes -o - | \ -// RUN: FileCheck %s --check-prefixes=CHECK,NO_HALF +// RUN: FileCheck %s --check-prefixes=CHECK #ifdef __HLSL_ENABLE_16_BIT // NATIVE_HALF-LABEL: define hidden noundef i16 @_Z14test_min_short @@ -84,53 +84,3 @@ uint64_t3 test_min_ulong3(uint64_t3 p0, uint64_t3 p1) { return min(p0, p1); } // CHECK-LABEL: define hidden noundef <4 x i64> @_Z15test_min_ulong4 // CHECK: call <4 x i64> @llvm.umin.v4i64 uint64_t4 test_min_ulong4(uint64_t4 p0, uint64_t4 p1) { return min(p0, p1); } - -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) half @_Z13test_min_half -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn half @llvm.minnum.f16( -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) float @_Z13test_min_half -// NO_HALF: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32( -half test_min_half(half p0, half p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x half> @_Z14test_min_half2 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <2 x half> @llvm.minnum.v2f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z14test_min_half2 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32( -half2 test_min_half2(half2 p0, half2 p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x half> @_Z14test_min_half3 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <3 x half> @llvm.minnum.v3f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z14test_min_half3 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32( -half3 test_min_half3(half3 p0, half3 p1) { return min(p0, p1); } -// NATIVE_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x half> @_Z14test_min_half4 -// NATIVE_HALF: call reassoc nnan ninf nsz arcp afn <4 x half> @llvm.minnum.v4f16 -// NO_HALF-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z14test_min_half4 -// NO_HALF: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32( -half4 test_min_half4(half4 p0, half4 p1) { return min(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) float @_Z14test_min_float -// CHECK: call reassoc nnan ninf nsz arcp afn float @llvm.minnum.f32( -float test_min_float(float p0, float p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> @_Z15test_min_float2 -// CHECK: call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.minnum.v2f32 -float2 test_min_float2(float2 p0, float2 p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> @_Z15test_min_float3 -// CHECK: call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.minnum.v3f32 -float3 test_min_float3(float3 p0, float3 p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> @_Z15test_min_float4 -// CHECK: call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.minnum.v4f32 -float4 test_min_float4(float4 p0, float4 p1) { return min(p0, p1); } - -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) double @_Z15test_min_double -// CHECK: call reassoc nnan ninf nsz arcp afn double @llvm.minnum.f64( -double test_min_double(double p0, double p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x double> @_Z16test_min_double2 -// CHECK: call reassoc nnan ninf nsz arcp afn <2 x double> @llvm.minnum.v2f64 -double2 test_min_double2(double2 p0, double2 p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x double> @_Z16test_min_double3 -// CHECK: call reassoc nnan ninf nsz arcp afn <3 x double> @llvm.minnum.v3f64 -double3 test_min_double3(double3 p0, double3 p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> @_Z16test_min_double4 -// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.minnum.v4f64 -double4 test_min_double4(double4 p0, double4 p1) { return min(p0, p1); } -// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x double> {{.*}}test_min_double4_mismatch -// CHECK: call reassoc nnan ninf nsz arcp afn <4 x double> @llvm.minnum.v4f64 -double4 test_min_double4_mismatch(double4 p0, double p1) { return min(p0, p1); } diff --git a/clang/test/SemaHLSL/BuiltIns/vec-scalar-compat-overload-warnings.hlsl b/clang/test/SemaHLSL/BuiltIns/vec-scalar-compat-overload-warnings.hlsl index 9e942784041fa..f3b64d440c938 100644 --- a/clang/test/SemaHLSL/BuiltIns/vec-scalar-compat-overload-warnings.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/vec-scalar-compat-overload-warnings.hlsl @@ -19,23 +19,3 @@ float3 lerp_test(float3 p0, float3 p1, float p2) { // CHECK: warning: 'lerp<float, 3U>' is deprecated: In 202x mismatched vector/scalar lowering for lerp is deprecated. Explicitly cast parameters. return lerp(p0, p1, p2); } - -float2 max_test1(float2 p0, float p1) { - // CHECK: warning: 'max<float, 2U>' is deprecated: In 202x mismatched vector/scalar lowering for max is deprecated. Explicitly cast parameters. - return max(p0, p1); -} - -float3 max_test2(float p0, float3 p1) { - // CHECK: warning: 'max<float, 3U>' is deprecated: In 202x mismatched vector/scalar lowering for max is deprecated. Explicitly cast parameters. - return max(p0, p1); -} - -float2 min_test1(float2 p0, float p1) { - // CHECK: warning: 'min<float, 2U>' is deprecated: In 202x mismatched vector/scalar lowering for min is deprecated. Explicitly cast parameters. - return min(p0, p1); -} - -float3 min_test2(float p0, float3 p1) { - // CHECK: warning: 'min<float, 3U>' is deprecated: In 202x mismatched vector/scalar lowering for min is deprecated. Explicitly cast parameters. - return min(p0, p1); -} >From d6cfe7a15a89d6cff326dd763e52ca9d5772e631 Mon Sep 17 00:00:00 2001 From: YunQiang Su <[email protected]> Date: Thu, 12 Feb 2026 11:38:31 +0800 Subject: [PATCH 4/4] Support vp_minimumnum --- llvm/docs/LangRef.rst | 100 ++++++++++++++++++ llvm/include/llvm/IR/Intrinsics.td | 10 ++ llvm/include/llvm/IR/VPIntrinsics.def | 14 +++ llvm/lib/CodeGen/ExpandVectorPredication.cpp | 2 + .../SelectionDAG/LegalizeVectorTypes.cpp | 2 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +-- 6 files changed, 135 insertions(+), 7 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 28edd439b6900..fcdb7914bc52d 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23404,6 +23404,106 @@ Examples: %t = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl) %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison +.. _int_vp_minimumnum: + +'``llvm.vp.minimumnum.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x float> @llvm.vp.minimumnum.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>) + declare <vscale x 4 x float> @llvm.vp.minimumnum.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>) + declare <256 x double> @llvm.vp.minimumnum.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>) + +Overview: +""""""""" + +Predicated floating-point minimumNumber of two vectors of floating-point values, +not propagating NaNs (even sNaN) and treating -0.0 as less than +0.0. + +Arguments: +"""""""""" + +The first two arguments and the result have the same vector of floating-point type. The +third argument is the vector mask and has the same number of elements as the +result vector type. The fourth argument is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.minimumnum``' intrinsic performs floating-point minimumNumber (:ref:`minimumnum <i_minimumnum>`) +of the first and second vector arguments on each enabled lane, the result being +NaN only if both arguments are NaNs. -0.0 is considered to be less than +0.0 for this +intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. +The operation is performed in the default floating-point environment. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x float> @llvm.vp.minimumnum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b) + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison + + +.. _int_vp_maximumnum: + +'``llvm.vp.maximumnum.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <16 x float> @llvm.vp.maximumnum.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>) + declare <vscale x 4 x float> @llvm.vp.maximumnum.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>) + declare <256 x double> @llvm.vp.maximumnum.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>) + +Overview: +""""""""" + +Predicated floating-point maximumNumber of two vectors of floating-point values, +not propagating NaNs (evne sNaN) and treating -0.0 as less than +0.0. + +Arguments: +"""""""""" + +The first two arguments and the result have the same vector of floating-point type. The +third argument is the vector mask and has the same number of elements as the +result vector type. The fourth argument is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.maximumnum``' intrinsic performs floating-point maximumNumber (:ref:`maximumnum <i_maximumnum>`) +of the first and second vector arguments on each enabled lane, the result being +NaN only if both arguments are NaNs. -0.0 is considered to be less than +0.0 for this +intrinsic. The result on disabled lanes is a :ref:`poison value <poisonvalues>`. +The operation is performed in the default floating-point environment. + +Examples: +""""""""" + +.. code-block:: llvm + + %r = call <4 x float> @llvm.vp.maximumnum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + + %t = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl) + %also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> poison + + .. _int_vp_fadd: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 46e7b4b5c9491..e41d43970f030 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -2275,6 +2275,16 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in { LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; + def int_vp_minimumnum : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_maximumnum : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; def int_vp_copysign : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 0b0c744487b92..5fec07b4aa976 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -405,6 +405,20 @@ VP_PROPERTY_FUNCTIONAL_SDOPC(FMAXIMUM) VP_PROPERTY_FUNCTIONAL_INTRINSIC(maximum) END_REGISTER_VP(vp_maximum, VP_FMAXIMUM) +// llvm.vp.minimumnum(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_minimumnum, 2, 3, VP_FMINIMUMNUM, -1) +VP_PROPERTY_BINARYOP +VP_PROPERTY_FUNCTIONAL_SDOPC(FMINIMUMNUM) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(minimumnum) +END_REGISTER_VP(vp_minimum, VP_FMINIMUMNUM) + +// llvm.vp.maximumnum(x,y,mask,vlen) +BEGIN_REGISTER_VP(vp_maximumnum, 2, 3, VP_FMAXIMUMNUM, -1) +VP_PROPERTY_BINARYOP +VP_PROPERTY_FUNCTIONAL_SDOPC(FMAXIMUMNUM) +VP_PROPERTY_FUNCTIONAL_INTRINSIC(maximumnum) +END_REGISTER_VP(vp_maximum, VP_FMAXIMUMNUM) + // llvm.vp.ceil(x,mask,vlen) BEGIN_REGISTER_VP(vp_ceil, 1, 2, VP_FCEIL, -1) VP_PROPERTY_FUNCTIONAL_INTRINSIC(ceil) diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 17fae8f6c8185..0a117eee71fb2 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -613,6 +613,8 @@ bool CachingVPExpander::expandPredication(VPIntrinsic &VPI) { case Intrinsic::vp_minnum: case Intrinsic::vp_maximum: case Intrinsic::vp_minimum: + case Intrinsic::vp_maximumnum: + case Intrinsic::vp_minimumnum: case Intrinsic::vp_fma: case Intrinsic::vp_fmuladd: return expandPredicationToFPCall(Builder, VPI, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1fd5166ec148a..34f5ae8e71276 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1415,7 +1415,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMAXIMUM: case ISD::VP_FMAXIMUM: case ISD::FMINIMUMNUM: + case ISD::VP_FMINIMUMNUM: case ISD::FMAXIMUMNUM: + case ISD::VP_FMAXIMUMNUM: case ISD::SDIV: case ISD::VP_SDIV: case ISD::UDIV: case ISD::VP_UDIV: case ISD::FDIV: case ISD::VP_FDIV: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 9b88bc5c39ce4..09f894b4bd720 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -882,7 +882,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP, ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND, - ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, + ISD::VP_SQRT, ISD::VP_FMINIMUMNUM, ISD::VP_FMAXIMUMNUM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, @@ -1203,8 +1203,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, - ISD::VP_FMINNUM, - ISD::VP_FMAXNUM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, @@ -1215,6 +1213,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SETCC, ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, + ISD::VP_FMINIMUMNUM, + ISD::VP_FMAXIMUMNUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM}; @@ -7637,11 +7637,11 @@ static unsigned getRISCVVLOp(SDValue Op) { return RISCVISD::VFCVT_RTZ_XU_F_VL; case ISD::FMINNUM: case ISD::FMINIMUMNUM: - case ISD::VP_FMINNUM: + case ISD::VP_FMINIMUMNUM: return RISCVISD::VFMIN_VL; case ISD::FMAXNUM: case ISD::FMAXIMUMNUM: - case ISD::VP_FMAXNUM: + case ISD::VP_FMAXIMUMNUM: return RISCVISD::VFMAX_VL; case ISD::LRINT: case ISD::VP_LRINT: @@ -8951,8 +8951,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_FABS: case ISD::VP_SQRT: case ISD::VP_FMA: - case ISD::VP_FMINNUM: - case ISD::VP_FMAXNUM: + case ISD::VP_FMINIMUMNUM: + case ISD::VP_FMAXIMUMNUM: case ISD::VP_FCOPYSIGN: if (isPromotedOpNeedingSplit(Op, Subtarget)) return SplitVPOp(Op, DAG); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
