Re: [PATCH] D24512: AMDGPU: Fix target options fp32/64-denormals

2016-09-13 Thread Yaxun Liu via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL281357: AMDGPU: Fix target options fp32/64-denormals 
(authored by yaxunl).

Changed prior to commit:
  https://reviews.llvm.org/D24512?vs=71183=71201#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D24512

Files:
  cfe/trunk/lib/Basic/Targets.cpp
  cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl

Index: cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
===
--- cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
+++ cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
@@ -1,13 +1,19 @@
 // RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
 // RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa 
-target-cpu fiji %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s 
| FileCheck %s --check-prefix=CHECK-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature 
-fp64-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu 
fiji %s | FileCheck --check-prefix=CHECK-FEATURE %s
 
 // For non-amdgcn targets, this test just checks that the -cl-denorms-are-zero 
argument is accepted
 // by clang.  This option is currently a no-op, which is allowed by the
 // OpenCL specification.
 
+// For amdgcn target cpu fiji, fp32 should be flushed since fiji does not 
support fp32 denormals, unless +fp32-denormals is
+// explicitly set. amdgcn target always do not flush fp64 denormals.
+
 // CHECK-DENORM-LABEL: define void @f()
-// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
+// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp64-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
 // CHECK-LABEL: define void @f()
-// CHECK-NOT: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
+// CHECK: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp64-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// CHECK-FEATURE-LABEL: define void @f()
+// CHECK-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-denormals{{[^"]*}}"
 void f() {}
Index: cfe/trunk/lib/Basic/Targets.cpp
===
--- cfe/trunk/lib/Basic/Targets.cpp
+++ cfe/trunk/lib/Basic/Targets.cpp
@@ -1965,7 +1965,7 @@
   bool hasFP64:1;
   bool hasFMAF:1;
   bool hasLDEXPF:1;
-  bool hasDenormSupport:1;
+  bool hasFullSpeedFP32Denorms:1;
 
   static bool isAMDGCN(const llvm::Triple ) {
 return TT.getArch() == llvm::Triple::amdgcn;
@@ -1978,14 +1978,12 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasDenormSupport(false){
+  hasFullSpeedFP32Denorms(false){
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
-if (Opts.CPU == "fiji")
-  hasDenormSupport = true;
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
 DataLayoutStringSI : DataLayoutStringR600);
@@ -2040,8 +2038,6 @@
 
   void adjustTargetOptions(const CodeGenOptions ,
TargetOptions ) const override {
-if (!hasDenormSupport)
-  return;
 bool hasFP32Denormals = false;
 bool hasFP64Denormals = false;
 for (auto  : TargetOpts.FeaturesAsWritten) {
@@ -2051,11 +2047,11 @@
 hasFP64Denormals = true;
 }
 if (!hasFP32Denormals)
-  TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
- Twine("fp32-denormals")).str());
+  TargetOpts.Features.push_back((Twine(hasFullSpeedFP32Denorms &&
+  !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
+// Always do not flush fp64 denorms.
 if (!hasFP64Denormals && hasFP64)
-  TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
- Twine("fp64-denormals")).str());
+  TargetOpts.Features.push_back("+fp64-denormals");
   }
 
   ArrayRef getTargetBuiltins() const override {


Index: cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
===
--- cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
+++ cfe/trunk/test/CodeGenOpenCL/denorms-are-zero.cl
@@ -1,13 +1,19 @@
 // RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
 // RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=CHECK-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | 

Re: [PATCH] D24512: AMDGPU: Fix target options fp32/64-denormals

2016-09-13 Thread Yaxun Liu via cfe-commits
yaxunl added inline comments.


Comment at: lib/Basic/Targets.cpp:1962
@@ -1961,3 +1961,3 @@
   bool hasLDEXPF:1;
-  bool hasDenormSupport:1;
+  bool hasFP32DenormSupport:1;
 

tstellarAMD wrote:
> I think this name is a little confusing, because the hardware does support  
> fp32 denorms.  I would change this to something like hasFullSpeedFP32Denorms.
I will change that when committing. Thanks.


https://reviews.llvm.org/D24512



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D24512: AMDGPU: Fix target options fp32/64-denormals

2016-09-13 Thread Tom Stellard via cfe-commits
tstellarAMD accepted this revision.
tstellarAMD added a comment.
This revision is now accepted and ready to land.

Once small comment otherwise. LGTM.



Comment at: lib/Basic/Targets.cpp:1962
@@ -1961,3 +1961,3 @@
   bool hasLDEXPF:1;
-  bool hasDenormSupport:1;
+  bool hasFP32DenormSupport:1;
 

I think this name is a little confusing, because the hardware does support  
fp32 denorms.  I would change this to something like hasFullSpeedFP32Denorms.


https://reviews.llvm.org/D24512



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D24512: AMDGPU: Fix target options fp32/64-denormals

2016-09-13 Thread Yaxun Liu via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: tstellarAMD, nhaustov, arsenm.
yaxunl added subscribers: cfe-commits, AMDGPU.
Herald added a subscriber: wdng.

Fix target options for fp32/64-denormals so that

  +fp64-denormals is set if fp64 is supported
  -fp32-denormals if fp32 denormals is not supported, or -cl-denorms-are-zero 
is set
  +fp32-denormals if fp32 denormals is supported and -cl-denorms-are-zero is 
not set


https://reviews.llvm.org/D24512

Files:
  lib/Basic/Targets.cpp
  test/CodeGenOpenCL/denorms-are-zero.cl

Index: test/CodeGenOpenCL/denorms-are-zero.cl
===
--- test/CodeGenOpenCL/denorms-are-zero.cl
+++ test/CodeGenOpenCL/denorms-are-zero.cl
@@ -1,13 +1,19 @@
 // RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
 // RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa 
-target-cpu fiji %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s 
| FileCheck %s --check-prefix=CHECK-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature 
-fp64-denormals -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu 
fiji %s | FileCheck --check-prefix=CHECK-FEATURE %s
 
 // For non-amdgcn targets, this test just checks that the -cl-denorms-are-zero 
argument is accepted
 // by clang.  This option is currently a no-op, which is allowed by the
 // OpenCL specification.
 
+// For amdgcn target cpu fiji, fp32 should be flushed since fiji does not 
support fp32 denormals, unless +fp32-denormals is
+// explicitly set. amdgcn target always do not flush fp64 denormals.
+
 // CHECK-DENORM-LABEL: define void @f()
-// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
+// CHECK-DENORM: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp64-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
 // CHECK-LABEL: define void @f()
-// CHECK-NOT: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,+fp64-denormals{{[^"]*}}"
+// CHECK: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp64-denormals,{{[^"]*}}-fp32-denormals{{[^"]*}}"
+// CHECK-FEATURE-LABEL: define void @f()
+// CHECK-FEATURE: attributes #{{[0-9]*}} = {{{[^}]*}} 
"target-features"="{{[^"]*}}+fp32-denormals,{{[^"]*}}-fp64-denormals{{[^"]*}}"
 void f() {}
Index: lib/Basic/Targets.cpp
===
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -1959,7 +1959,7 @@
   bool hasFP64:1;
   bool hasFMAF:1;
   bool hasLDEXPF:1;
-  bool hasDenormSupport:1;
+  bool hasFP32DenormSupport:1;
 
   static bool isAMDGCN(const llvm::Triple ) {
 return TT.getArch() == llvm::Triple::amdgcn;
@@ -1972,14 +1972,12 @@
   hasFP64(false),
   hasFMAF(false),
   hasLDEXPF(false),
-  hasDenormSupport(false){
+  hasFP32DenormSupport(false){
 if (getTriple().getArch() == llvm::Triple::amdgcn) {
   hasFP64 = true;
   hasFMAF = true;
   hasLDEXPF = true;
 }
-if (Opts.CPU == "fiji")
-  hasDenormSupport = true;
 
 resetDataLayout(getTriple().getArch() == llvm::Triple::amdgcn ?
 DataLayoutStringSI : DataLayoutStringR600);
@@ -2034,8 +2032,6 @@
 
   void adjustTargetOptions(const CodeGenOptions ,
TargetOptions ) const override {
-if (!hasDenormSupport)
-  return;
 bool hasFP32Denormals = false;
 bool hasFP64Denormals = false;
 for (auto  : TargetOpts.FeaturesAsWritten) {
@@ -2045,11 +2041,11 @@
 hasFP64Denormals = true;
 }
 if (!hasFP32Denormals)
-  TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
- Twine("fp32-denormals")).str());
+  TargetOpts.Features.push_back((Twine(hasFP32DenormSupport &&
+  !CGOpts.FlushDenorm ? '+' : '-') + Twine("fp32-denormals")).str());
+// Always do not flush fp64 denorms.
 if (!hasFP64Denormals && hasFP64)
-  TargetOpts.Features.push_back((Twine(CGOpts.FlushDenorm ? '-' : '+') +
- Twine("fp64-denormals")).str());
+  TargetOpts.Features.push_back("+fp64-denormals");
   }
 
   ArrayRef getTargetBuiltins() const override {


Index: test/CodeGenOpenCL/denorms-are-zero.cl
===
--- test/CodeGenOpenCL/denorms-are-zero.cl
+++ test/CodeGenOpenCL/denorms-are-zero.cl
@@ -1,13 +1,19 @@
 // RUN: %clang_cc1 -S -cl-denorms-are-zero -o - %s 2>&1
 // RUN: %clang_cc1 -emit-llvm -cl-denorms-are-zero -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm -o - -triple amdgcn--amdhsa -target-cpu fiji %s | FileCheck %s --check-prefix=CHECK-DENORM
+// RUN: %clang_cc1 -emit-llvm -target-feature +fp32-denormals -target-feature -fp64-denormals -cl-denorms-are-zero -o -