[PATCH] D147497: [AArch64] Use fneg instead of fsub -0.0, X Cin IR expansion of __builtin_neon_vfmsh_f16.

2023-04-04 Thread Craig Topper via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0109f8d1e3bf: [AArch64] Use fneg instead of fsub -0.0, X Cin 
IR expansion of… (authored by craig.topper).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147497/new/

https://reviews.llvm.org/D147497

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c


Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -652,7 +652,7 @@
 }
 
 // CHECK-LABEL: test_vfmsh_f16
-// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[SUB:%.*]] = fneg half %b
 // CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half 
%a)
 // CHECK:  ret half [[ADD]]
 float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
@@ -290,8 +290,7 @@
 }
 
 // COMMON-LABEL: test_vfmsh_f16
-// UNCONSTRAINED:  [[SUB:%.*]] = fsub half 0xH8000, %b
-// CONSTRAINED:[[SUB:%.*]] = call half 
@llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata 
!"round.tonearest", metadata !"fpexcept.strict")
+// COMMONIR:  [[SUB:%.*]] = fneg half %b
 // UNCONSTRAINED:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half 
%c, half %a)
 // CONSTRAINED:[[ADD:%.*]] = call half 
@llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata 
!"round.tonearest", metadata !"fpexcept.strict")
 // COMMONIR:   ret half [[ADD]]
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -10965,14 +10965,12 @@
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
   case NEON::BI__builtin_neon_vfmsh_f16: {
-// FIXME: This should be an fneg instruction:
-Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
-Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), 
"vsubh");
+Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
 
 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
 return emitCallMaybeConstrainedFPBuiltin(
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
-{Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+{Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
   }
   case NEON::BI__builtin_neon_vaddd_s64:
   case NEON::BI__builtin_neon_vaddd_u64:


Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -652,7 +652,7 @@
 }
 
 // CHECK-LABEL: test_vfmsh_f16
-// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[SUB:%.*]] = fneg half %b
 // CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
 // CHECK:  ret half [[ADD]]
 float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
@@ -290,8 +290,7 @@
 }
 
 // COMMON-LABEL: test_vfmsh_f16
-// UNCONSTRAINED:  [[SUB:%.*]] = fsub half 0xH8000, %b
-// CONSTRAINED:[[SUB:%.*]] = call half @llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
+// COMMONIR:  [[SUB:%.*]] = fneg half %b
 // UNCONSTRAINED:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
 // CONSTRAINED:[[ADD:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
 // COMMONIR:   ret half [[ADD]]
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -10965,14 +10965,12 @@
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
   case NEON::BI__builtin_neon_vfmsh_f16: {
-// FIXME: This should be 

[PATCH] D147497: [AArch64] Use fneg instead of fsub -0.0, X Cin IR expansion of __builtin_neon_vfmsh_f16.

2023-04-04 Thread Dave Green via Phabricator via cfe-commits
dmgreen accepted this revision.
dmgreen added a comment.

Sounds OK to me


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147497/new/

https://reviews.llvm.org/D147497

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147497: [AArch64] Use fneg instead of fsub -0.0, X Cin IR expansion of __builtin_neon_vfmsh_f16.

2023-04-04 Thread Sjoerd Meijer via Phabricator via cfe-commits
SjoerdMeijer accepted this revision.
SjoerdMeijer added a comment.
This revision is now accepted and ready to land.

LGTM


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D147497/new/

https://reviews.llvm.org/D147497

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D147497: [AArch64] Use fneg instead of fsub -0.0, X Cin IR expansion of __builtin_neon_vfmsh_f16.

2023-04-03 Thread Craig Topper via Phabricator via cfe-commits
craig.topper created this revision.
craig.topper added reviewers: efriedma, dmgreen, SjoerdMeijer, john.brawn.
Herald added a subscriber: kristof.beyls.
Herald added a project: All.
craig.topper requested review of this revision.
Herald added a project: clang.

Addresses the FIXME and removes the only in tree use of
llvm::ConstantFP::getZeroValueForNegation for an FP type.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D147497

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
  clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c


Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -652,7 +652,7 @@
 }
 
 // CHECK-LABEL: test_vfmsh_f16
-// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[SUB:%.*]] = fneg half %b
 // CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half 
%a)
 // CHECK:  ret half [[ADD]]
 float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
@@ -290,8 +290,7 @@
 }
 
 // COMMON-LABEL: test_vfmsh_f16
-// UNCONSTRAINED:  [[SUB:%.*]] = fsub half 0xH8000, %b
-// CONSTRAINED:[[SUB:%.*]] = call half 
@llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata 
!"round.tonearest", metadata !"fpexcept.strict")
+// COMMONIR:  [[SUB:%.*]] = fneg half %b
 // UNCONSTRAINED:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half 
%c, half %a)
 // CONSTRAINED:[[ADD:%.*]] = call half 
@llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata 
!"round.tonearest", metadata !"fpexcept.strict")
 // COMMONIR:   ret half [[ADD]]
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -10965,14 +10965,12 @@
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
   case NEON::BI__builtin_neon_vfmsh_f16: {
-// FIXME: This should be an fneg instruction:
-Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
-Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), 
"vsubh");
+Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");
 
 // NEON intrinsic puts accumulator first, unlike the LLVM fma.
 return emitCallMaybeConstrainedFPBuiltin(
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
-{Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
+{Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});
   }
   case NEON::BI__builtin_neon_vaddd_s64:
   case NEON::BI__builtin_neon_vaddd_u64:


Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
@@ -652,7 +652,7 @@
 }
 
 // CHECK-LABEL: test_vfmsh_f16
-// CHECK:  [[SUB:%.*]] = fsub half 0xH8000, %b
+// CHECK:  [[SUB:%.*]] = fneg half %b
 // CHECK:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
 // CHECK:  ret half [[ADD]]
 float16_t test_vfmsh_f16(float16_t a, float16_t b, float16_t c) {
Index: clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
===
--- clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
+++ clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics-constrained.c
@@ -290,8 +290,7 @@
 }
 
 // COMMON-LABEL: test_vfmsh_f16
-// UNCONSTRAINED:  [[SUB:%.*]] = fsub half 0xH8000, %b
-// CONSTRAINED:[[SUB:%.*]] = call half @llvm.experimental.constrained.fsub.f16(half 0xH8000, half %b, metadata !"round.tonearest", metadata !"fpexcept.strict")
+// COMMONIR:  [[SUB:%.*]] = fneg half %b
 // UNCONSTRAINED:  [[ADD:%.*]] = call half @llvm.fma.f16(half [[SUB]], half %c, half %a)
 // CONSTRAINED:[[ADD:%.*]] = call half @llvm.experimental.constrained.fma.f16(half [[SUB]], half %c, half %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
 // COMMONIR:   ret half [[ADD]]
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -10965,14 +10965,12 @@
 *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,
 {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
   case