yaxunl updated this revision to Diff 338379.
yaxunl marked 3 inline comments as done.
yaxunl edited the summary of this revision.
yaxunl added a comment.

Revised by Matt's comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D77013/new/

https://reviews.llvm.org/D77013

Files:
  clang/include/clang/Basic/CodeGenOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/TargetInfo.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CodeGenOpenCL/amdgpu-ieee.cl

Index: clang/test/CodeGenOpenCL/amdgpu-ieee.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/amdgpu-ieee.cl
@@ -0,0 +1,48 @@
+// REQUIRES: amdgpu-registered-target
+//
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefixes=COMMON,ON %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -mamdgpu-ieee | FileCheck -check-prefixes=COMMON,ON %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -mno-amdgpu-ieee | FileCheck -check-prefixes=COMMON,OFF %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -menable-no-nans | FileCheck -check-prefixes=COMMON,OFF %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -menable-no-nans -mamdgpu-ieee | FileCheck -check-prefixes=COMMON,ON %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -cl-fast-relaxed-math | FileCheck -check-prefixes=COMMON,OFF %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \
+// RUN:   -cl-fast-relaxed-math -mamdgpu-ieee \
+
+// Check AMDGCN ISA generation.
+
+// RUN: | FileCheck -check-prefixes=COMMON,ON %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
+// RUN:   -mamdgpu-ieee \
+// RUN: | FileCheck -check-prefixes=ISA-ON %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O3 -S -o - %s \
+// RUN:   -mno-amdgpu-ieee \
+// RUN: | FileCheck -check-prefixes=ISA-OFF %s
+
+// COMMON: define{{.*}} amdgpu_kernel void @kern{{.*}} [[ATTRS1:#[0-9]+]]
+// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}}
+// ISA-ON: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}}
+// ISA-ON: v_min_f32_e32
+// ISA-ON: ; IeeeMode: 1
+// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}}
+// ISA-OFF-NOT: v_mul_f32_e64 v{{[0-9]+}}, 1.0, s{{[0-9]+}}
+// ISA-OFF: v_min_f32_e32
+// ISA-OFF: ; IeeeMode: 0
+kernel void kern(global float *x, float y, float z) {
+  *x = __builtin_fmin(y, z);
+}
+
+// COMMON: define{{.*}}void @fun() [[ATTRS2:#[0-9]+]]
+void fun() {
+}
+
+// ON-NOT: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee"
+// OFF: attributes [[ATTRS1]] = {{.*}} "amdgpu-ieee"="false"
+// ON-NOT: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee"
+// OFF: attributes [[ATTRS2]] = {{.*}} "amdgpu-ieee"="false"
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -1944,6 +1944,13 @@
   else if (Args.hasArg(options::OPT_fno_finite_loops))
     Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never;
 
+  // When NaN is not honored, floating point opcodes that support exception
+  // flag gathering does not need to quiet or propagate signaling NaN inputs
+  // per IEEE 754-2008. Note this only concerns about signaling NaN.
+  Opts.EmitIEEENaNCompliantInsts =
+      Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee,
+                   !LangOptsRef.NoHonorNaNs);
+
   return Success && Diags.getNumErrors() == NumErrorsBefore;
 }
 
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -9166,6 +9166,9 @@
 
   if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
     F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");
+
+  if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
+    F->addFnAttr("amdgpu-ieee", "false");
 }
 
 unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3165,6 +3165,12 @@
                      Values<"command,reactor">,
                      HelpText<"Execution model (WebAssembly only)">;
 
+def mamdgpu_ieee : Flag<["-"], "mamdgpu-ieee">, Flags<[CC1Option]>,
+  Group<m_Group>, HelpText<"Floating point opcodes that support exception flag "
+   "gathering quiet and propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU only)">;
+def mno_amdgpu_ieee : Flag<["-"], "mno-amdgpu-ieee">, Flags<[CC1Option]>,
+  Group<m_Group>;
+
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
   HelpText<"Specify code object ABI version. Defaults to 3. (AMDGPU only)">,
   MetaVarName<"<version>">, Values<"2,3,4">;
Index: clang/include/clang/Basic/CodeGenOptions.def
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.def
+++ clang/include/clang/Basic/CodeGenOptions.def
@@ -425,6 +425,10 @@
 /// according to the field declaring type width.
 CODEGENOPT(AAPCSBitfieldWidth, 1, 1)
 
+/// Floating point opcodes that support exception flag gathering quiet and
+/// propagate signaling NaN inputs per IEEE 754-2008 (AMDGPU Only)
+CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1)
+
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
 #undef VALUE_CODEGENOPT
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to