dfukalov created this revision. dfukalov added reviewers: yaxunl, b-sumner, rampitec. Herald added subscribers: cfe-commits, Anastasia. Herald added a project: clang.
With the FAST default FP_CONTRACT mode' setting for `func(float a, float b, float c) { return a + b * c; }` FE generates pair `fmul contract` + `fadd contract` that are fused to an fma operation in BE. But OpenCL fuses these in FE. This approach seems more effective since avoids a probabilty that these instructions are not fused in BE. Default setting can be overridden with `#pragma STDC FP_CONTRACT` by a programmer. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D82650 Files: clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGenHIP/fp-contract.hip clang/test/CodeGenHIP/lit.local.cfg Index: clang/test/CodeGenHIP/lit.local.cfg =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.cpp', '.hip'] Index: clang/test/CodeGenHIP/fp-contract.hip =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/fp-contract.hip @@ -0,0 +1,36 @@ +// By default we should fuse multiply/add into llvm.fmuladd instruction. +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=on -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=fast -fcuda-is-device -o - %s | FileCheck -check-prefixes FAST,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=off -fcuda-is-device -o - %s | FileCheck -check-prefixes DISABLED,ALL %s + +#define __device__ __attribute__((device)) + +// ALL-LABEL: func +// ENABLED: call float @llvm.fmuladd.f32 +// FAST: fmul contract float +// FAST-NEXT: fadd contract float +// DISABLED: fmul float +// DISABLED-NEXT: fadd float +__device__ float func(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_on +// ALL: call float @llvm.fmuladd.f32 +#pragma STDC FP_CONTRACT ON +__device__ float func_on(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_off +// ALL: fmul float +// ALL-NEXT: fadd float +#pragma STDC FP_CONTRACT OFF +__device__ float func_off(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_def +// ENABLED: call float @llvm.fmuladd.f32 +// FAST: fmul contract float +// FAST-NEXT: fadd contract float +// DISABLED: fmul float +// DISABLED-NEXT: fadd float +#pragma STDC FP_CONTRACT DEFAULT +__device__ float func_def(float a, float b, float c) { return a + b * c; } Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2374,6 +2374,10 @@ // Set default FP_CONTRACT to FAST. Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); + // Set default FP_CONTRACT to ON like for OpenCL. + if (Opts.HIP) + Opts.setDefaultFPContractMode(LangOptions::FPM_On); + Opts.RenderScript = IK.getLanguage() == Language::RenderScript; if (Opts.RenderScript) { Opts.NativeHalfType = 1;
Index: clang/test/CodeGenHIP/lit.local.cfg =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.cpp', '.hip'] Index: clang/test/CodeGenHIP/fp-contract.hip =================================================================== --- /dev/null +++ clang/test/CodeGenHIP/fp-contract.hip @@ -0,0 +1,36 @@ +// By default we should fuse multiply/add into llvm.fmuladd instruction. +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=on -fcuda-is-device -o - %s | FileCheck -check-prefixes ENABLED,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=fast -fcuda-is-device -o - %s | FileCheck -check-prefixes FAST,ALL %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm -ffp-contract=off -fcuda-is-device -o - %s | FileCheck -check-prefixes DISABLED,ALL %s + +#define __device__ __attribute__((device)) + +// ALL-LABEL: func +// ENABLED: call float @llvm.fmuladd.f32 +// FAST: fmul contract float +// FAST-NEXT: fadd contract float +// DISABLED: fmul float +// DISABLED-NEXT: fadd float +__device__ float func(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_on +// ALL: call float @llvm.fmuladd.f32 +#pragma STDC FP_CONTRACT ON +__device__ float func_on(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_off +// ALL: fmul float +// ALL-NEXT: fadd float +#pragma STDC FP_CONTRACT OFF +__device__ float func_off(float a, float b, float c) { return a + b * c; } + +// ALL-LABEL: func_def +// ENABLED: call float @llvm.fmuladd.f32 +// FAST: fmul contract float +// FAST-NEXT: fadd contract float +// DISABLED: fmul float +// DISABLED-NEXT: fadd float +#pragma STDC FP_CONTRACT DEFAULT +__device__ float func_def(float a, float b, float c) { return a + b * c; } Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2374,6 +2374,10 @@ // Set default FP_CONTRACT to FAST. Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); + // Set default FP_CONTRACT to ON like for OpenCL. + if (Opts.HIP) + Opts.setDefaultFPContractMode(LangOptions::FPM_On); + Opts.RenderScript = IK.getLanguage() == Language::RenderScript; if (Opts.RenderScript) { Opts.NativeHalfType = 1;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits