arsenm updated this revision to Diff 245972.
arsenm edited the summary of this revision.
arsenm added a comment.

Rebase


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74500/new/

https://reviews.llvm.org/D74500

Files:
  clang/include/clang/Basic/CodeGenOptions.h
  clang/include/clang/Driver/ToolChain.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/CodeGenCUDA/flush-denormals.cu
  clang/test/CodeGenCUDA/propagate-metadata.cu
  clang/test/CodeGenOpenCL/amdgpu-features.cl
  clang/test/Driver/cuda-flush-denormals-to-zero.cu
  clang/test/Driver/denormal-fp-math.c
  clang/test/Driver/fp-model.c
  llvm/include/llvm/ADT/FloatingPointMode.h

Index: llvm/include/llvm/ADT/FloatingPointMode.h
===================================================================
--- llvm/include/llvm/ADT/FloatingPointMode.h
+++ llvm/include/llvm/ADT/FloatingPointMode.h
@@ -45,25 +45,25 @@
   /// floating-point instructions implicitly treat the input value as 0.
   DenormalModeKind Input = DenormalModeKind::Invalid;
 
-  DenormalMode() = default;
-  DenormalMode(DenormalModeKind Out, DenormalModeKind In) :
+  constexpr DenormalMode() = default;
+  constexpr DenormalMode(DenormalModeKind Out, DenormalModeKind In) :
     Output(Out), Input(In) {}
 
 
-  static DenormalMode getInvalid() {
+  static constexpr DenormalMode getInvalid() {
     return DenormalMode(DenormalModeKind::Invalid, DenormalModeKind::Invalid);
   }
 
-  static DenormalMode getIEEE() {
+  static constexpr DenormalMode getIEEE() {
     return DenormalMode(DenormalModeKind::IEEE, DenormalModeKind::IEEE);
   }
 
-  static DenormalMode getPreserveSign() {
+  static constexpr DenormalMode getPreserveSign() {
     return DenormalMode(DenormalModeKind::PreserveSign,
                         DenormalModeKind::PreserveSign);
   }
 
-  static DenormalMode getPositiveZero() {
+  static constexpr DenormalMode getPositiveZero() {
     return DenormalMode(DenormalModeKind::PositiveZero,
                         DenormalModeKind::PositiveZero);
   }
Index: clang/test/Driver/fp-model.c
===================================================================
--- clang/test/Driver/fp-model.c
+++ clang/test/Driver/fp-model.c
@@ -63,6 +63,10 @@
 // RUN:   | FileCheck --check-prefix=WARNf %s
 // WARNf: warning: overriding '-ffp-model=strict' option with '-Ofast' [-Woverriding-t-option]
 
+// RUN: %clang -### -ffp-model=strict -fdenormal-fp-math=preserve-sign,preserve-sign -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=WARN10 %s
+// WARN10: warning: overriding '-ffp-model=strict' option with '-fdenormal-fp-math=preserve-sign,preserve-sign' [-Woverriding-t-option]
+
 // RUN: %clang -### -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NOROUND %s
 // CHECK-NOROUND: "-cc1"
Index: clang/test/Driver/denormal-fp-math.c
===================================================================
--- clang/test/Driver/denormal-fp-math.c
+++ clang/test/Driver/denormal-fp-math.c
@@ -8,7 +8,8 @@
 // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s
 // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s
 
-// CHECK-IEEE: -fdenormal-fp-math=ieee,ieee
+// TODO: ieee is the implied default, and the flag is not passed.
+// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee"
 // CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign"
 // CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero"
 // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee"
Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===================================================================
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -9,5 +9,9 @@
 
 // CPUFTZ-NOT: -fdenormal-fp-math
 
+// FTZ-NOT: -fdenormal-fp-math-f32=
 // FTZ: "-fdenormal-fp-math-f32=preserve-sign,preserve-sign"
-// NOFTZ: "-fdenormal-fp-math=ieee,ieee"
+
+// The default of ieee is omitted
+// NOFTZ-NOT: "-fdenormal-fp-math"
+// NOFTZ-NOT: "-fdenormal-fp-math-f32"
Index: clang/test/CodeGenOpenCL/amdgpu-features.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -14,13 +14,13 @@
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s
 
-// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,-fp32-denormals"
-// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,-fp32-denormals"
-// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime,-fp32-denormals"
+// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime"
+// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime"
+// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+fp32-denormals,+fp64-fp16-denormals,+gfx8-insts,+s-memrealtime"
 // GFX700: "target-features"="+ci-insts,+flat-address-space,+fp64-fp16-denormals,-fp32-denormals"
 // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
 // GFX601: "target-features"="+fp64-fp16-denormals,-fp32-denormals"
Index: clang/test/CodeGenCUDA/propagate-metadata.cu
===================================================================
--- clang/test/CodeGenCUDA/propagate-metadata.cu
+++ clang/test/CodeGenCUDA/propagate-metadata.cu
@@ -15,7 +15,7 @@
 // RUN:   %s -o %t.bc -triple nvptx-unknown-unknown
 
 // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
-// RUN:   -fno-trapping-math -fcuda-is-device -fdenormal-fp-math-f32=ieee -triple nvptx-unknown-unknown \
+// RUN:   -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
 // RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST
 
 // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc \
@@ -60,8 +60,7 @@
 // CHECK-SAME: convergent
 // CHECK-SAME: norecurse
 
-// FTZ-NOT: "denormal-fp-math"
-
+// FTZ: "denormal-fp-math"="ieee,ieee"
 // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 // NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
 
@@ -76,7 +75,8 @@
 // CHECK-SAME: convergent
 // CHECK-NOT: norecurse
 
-// FTZ-NOT: "denormal-fp-math"
+// FTZ-SAME: "denormal-fp-math"="ieee,ieee"
+// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee"
 
 // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 // NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee"
Index: clang/test/CodeGenCUDA/flush-denormals.cu
===================================================================
--- clang/test/CodeGenCUDA/flush-denormals.cu
+++ clang/test/CodeGenCUDA/flush-denormals.cu
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -fcuda-is-device \
 // RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
-// RUN:   FileCheck -check-prefix=DEFAULT %s
+// RUN:   FileCheck -check-prefix=NOFTZ %s
 
 // RUN: %clang_cc1 -fcuda-is-device -fdenormal-fp-math-f32=ieee \
 // RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
@@ -10,10 +10,9 @@
 // RUN:   -triple nvptx-nvidia-cuda -emit-llvm -o - %s | \
 // RUN:   FileCheck -check-prefix=FTZ %s
 
-// FIXME: Unspecified should default to ieee
 // RUN: %clang_cc1 -fcuda-is-device -x hip \
 // RUN:   -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm -o - %s | \
-// RUN:   FileCheck -check-prefix=AMDFTZ %s
+// RUN:   FileCheck -check-prefix=AMDNOFTZ %s
 
 // RUN: %clang_cc1 -fcuda-is-device -x hip \
 // RUN:   -triple amdgcn-amd-amdhsa -target-cpu gfx900 -fdenormal-fp-math-f32=ieee -emit-llvm -o - %s | \
@@ -42,10 +41,6 @@
 // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign"
 // NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee"
 
-
-// FIXME: This should be removed
-// DEFAULT-NOT: "denormal-fp-math-f32"
-
 // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals
 // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2550,8 +2550,13 @@
       ReciprocalMath = false;
       SignedZeros = true;
       // -fno_fast_math restores default denormal and fpcontract handling
-      DenormalFPMath = DefaultDenormalFPMath;
       FPContract = "";
+      DenormalFPMath = DefaultDenormalFPMath;
+
+      // FIXME: The target may have picked a non-IEEE default mode here based on
+      // -cl-denorms-are-zero. Should the target consider -fp-model interaction?
+      DenormalFP32Math = DefaultDenormalFP32Math;
+
       StringRef Val = A->getValue();
       if (OFastEnabled && !Val.equals("fast")) {
           // Only -ffp-model=fast is compatible with OFast, ignore.
@@ -2728,7 +2733,9 @@
       FPExceptionBehavior = "strict";
       // -fno_unsafe_math_optimizations restores default denormal handling
       DenormalFPMath = DefaultDenormalFPMath;
-      DenormalFP32Math = DefaultDenormalFP32Math;
+
+      // The target may have opted to flush just f32 by default, so force IEEE.
+      DenormalFP32Math = llvm::DenormalMode::getIEEE();
       break;
 
     case options::OPT_Ofast:
@@ -2769,11 +2776,12 @@
     if (StrictFPModel) {
       // If -ffp-model=strict has been specified on command line but
       // subsequent options conflict then emit warning diagnostic.
-      // TODO: How should this interact with DenormalFP32Math?
       if (HonorINFs && HonorNaNs &&
         !AssociativeMath && !ReciprocalMath &&
         SignedZeros && TrappingMath && RoundingFPMath &&
-        (FPContract.equals("off") || FPContract.empty()))
+        (FPContract.equals("off") || FPContract.empty()) &&
+        DenormalFPMath == llvm::DenormalMode::getIEEE() &&
+        DenormalFP32Math == llvm::DenormalMode::getIEEE())
         // OK: Current Arg doesn't conflict with -ffp-model=strict
         ;
       else {
@@ -2827,7 +2835,8 @@
     CmdArgs.push_back(Args.MakeArgString(ArgStr.str()));
   }
 
-  if (DenormalFP32Math.isValid()) {
+  // Add f32 specific denormal mode flag if it's different.
+  if (DenormalFP32Math != DenormalFPMath) {
     llvm::SmallString<64> DenormFlag;
     llvm::raw_svector_ostream ArgStr(DenormFlag);
     ArgStr << "-fdenormal-fp-math-f32=" << DenormalFP32Math;
Index: clang/include/clang/Driver/ToolChain.h
===================================================================
--- clang/include/clang/Driver/ToolChain.h
+++ clang/include/clang/Driver/ToolChain.h
@@ -623,8 +623,7 @@
       const llvm::opt::ArgList &DriverArgs,
       Action::OffloadKind DeviceOffloadKind,
       const llvm::fltSemantics *FPType = nullptr) const {
-    // FIXME: This should be IEEE when default handling is fixed.
-    return llvm::DenormalMode::getInvalid();
+    return llvm::DenormalMode::getIEEE();
   }
 };
 
Index: clang/include/clang/Basic/CodeGenOptions.h
===================================================================
--- clang/include/clang/Basic/CodeGenOptions.h
+++ clang/include/clang/Basic/CodeGenOptions.h
@@ -164,10 +164,10 @@
   std::string FloatABI;
 
   /// The floating-point denormal mode to use.
-  llvm::DenormalMode FPDenormalMode;
+  llvm::DenormalMode FPDenormalMode = llvm::DenormalMode::getIEEE();
 
-  /// The floating-point subnormal mode to use, for float.
-  llvm::DenormalMode FP32DenormalMode;
+  /// The floating-point denormal mode to use, for float.
+  llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE();
 
   /// The float precision limit to use, if non-empty.
   std::string LimitFloatPrecision;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to