Hi,

The attached patch changes Clang so that for types bigger than float, instead 
of converting to __fp16 via the sequence "InTy -> float -> __fp16", we perform 
conversions in just one step. This avoids the double rounding which potentially 
changes results from a natural IEEE-754 operation.

There are potential problems, but I believe the benefits outweigh them:

  - It's a change in semantics. I believe it's compatible with the major 
standards though (OpenCL requires accesses go via a builtin; n1833 for C would 
*demand* this change, from my reading of it).
  - It means double -> __fp16 conversion will fail on x86 and v7 ARM CPUs for 
now. Specifically, we will generate a libcall which isn't actually widespread 
(or probably implemented anywhere). I think this is preferable to the 
status-quo of producing a possibly incorrect result though.

Longer term, I'd like to improve the codegen here to use real fpext/fptrunc 
operations and remove many of the special cases for half. Unfortunately the 
LLVM CodeGen isn't up to this change yet, so I've just extended the use of the 
@llvm.convert.to.fp16 intrinsic.

So, is it OK to change this?

Cheers.

Tim.

http://reviews.llvm.org/D4602

Files:
  lib/CodeGen/CGExprScalar.cpp
  test/CodeGen/fp16-ops.c

Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -773,8 +773,13 @@
                              DstTy);
 
   // Cast to half via float
-  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) 
{
+    if (SrcTy->isFloatingPointTy())
+      return Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
+
     DstTy = CGF.FloatTy;
+  }
 
   if (isa<llvm::IntegerType>(SrcTy)) {
     bool InputSigned = SrcType->isSignedIntegerOrEnumerationType();
Index: test/CodeGen/fp16-ops.c
===================================================================
--- test/CodeGen/fp16-ops.c
+++ test/CodeGen/fp16-ops.c
@@ -5,6 +5,7 @@
 volatile cond_t test;
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
+volatile double d0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
@@ -52,7 +53,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = h0 * (__fp16) -2.0;
+  h1 = h0 * (__fp16) -2.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -71,7 +72,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = (h0 / (__fp16) -2.0);
+  h1 = (h0 / (__fp16) -2.0f);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -109,7 +110,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = ((__fp16)-2.0 - h0);
+  h1 = ((__fp16)-2.0f - h0);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -218,7 +219,7 @@
   // Check assignments (inc. compound)
   h0 = h1;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 = (__fp16)-2.0;
+  h0 = (__fp16)-2.0f;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 = f0;
 
@@ -231,7 +232,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 += (__fp16)1.0;
+  h0 += (__fp16)1.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -282,3 +283,12 @@
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 /= f2;
 }
+
+void test_trunc() {
+  // CHECK: call i16 @llvm.convert.to.fp16.f64(
+  h0 = d0;
+
+  // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
+  // CHECK: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
+  h0 = (float)d0;
+}
Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -773,8 +773,13 @@
                              DstTy);
 
   // Cast to half via float
-  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType)
+  if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
+    if (SrcTy->isFloatingPointTy())
+      return Builder.CreateCall(
+          CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src);
+
     DstTy = CGF.FloatTy;
+  }
 
   if (isa<llvm::IntegerType>(SrcTy)) {
     bool InputSigned = SrcType->isSignedIntegerOrEnumerationType();
Index: test/CodeGen/fp16-ops.c
===================================================================
--- test/CodeGen/fp16-ops.c
+++ test/CodeGen/fp16-ops.c
@@ -5,6 +5,7 @@
 volatile cond_t test;
 volatile __fp16 h0 = 0.0, h1 = 1.0, h2;
 volatile float f0, f1, f2;
+volatile double d0;
 
 void foo(void) {
   // CHECK-LABEL: define void @foo()
@@ -52,7 +53,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = h0 * (__fp16) -2.0;
+  h1 = h0 * (__fp16) -2.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fmul float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -71,7 +72,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = (h0 / (__fp16) -2.0);
+  h1 = (h0 / (__fp16) -2.0f);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fdiv float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -109,7 +110,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h1 = ((__fp16)-2.0 - h0);
+  h1 = ((__fp16)-2.0f - h0);
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fsub float
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -218,7 +219,7 @@
   // Check assignments (inc. compound)
   h0 = h1;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 = (__fp16)-2.0;
+  h0 = (__fp16)-2.0f;
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 = f0;
 
@@ -231,7 +232,7 @@
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
-  h0 += (__fp16)1.0;
+  h0 += (__fp16)1.0f;
   // CHECK: call float @llvm.convert.from.fp16.f32(
   // CHECK: fadd
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
@@ -282,3 +283,12 @@
   // CHECK: call i16 @llvm.convert.to.fp16.f32(
   h0 /= f2;
 }
+
+void test_trunc() {
+  // CHECK: call i16 @llvm.convert.to.fp16.f64(
+  h0 = d0;
+
+  // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float
+  // CHECK: call i16 @llvm.convert.to.fp16.f32(float [[MID]])
+  h0 = (float)d0;
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to