llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-flang-driver Author: Michael Klemm (mjklemm) <details> <summary>Changes</summary> This patch adds direct code-gen support for a faster MOD intrinsic for REAL types. Flang has maintained and keeps maintaining a high-precision implementation of the MOD intrinsic as part of the Fortran runtime. With the -ffast-real-mod flag, users can opt to avoid calling into the Fortran runtime, but instead trigger code-gen that produces faster code by avoiding the runtime call, at the expense of potentially risking bit cancelation by having the compiler use the MOD formula a specified by ISO Fortran. --- Full diff: https://github.com/llvm/llvm-project/pull/160660.diff 7 Files Affected: - (modified) clang/include/clang/Driver/Options.td (+1) - (modified) clang/lib/Driver/ToolChains/Flang.cpp (+3) - (modified) flang/include/flang/Support/LangOptions.def (+2-1) - (modified) flang/lib/Frontend/CompilerInvocation.cpp (+4) - (modified) flang/lib/Frontend/FrontendActions.cpp (+8) - (modified) flang/lib/Optimizer/Builder/IntrinsicCall.cpp (+34-3) - (added) flang/test/Lower/Intrinsics/fast-real-mod.f90 (+75) ``````````diff diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7c514e809aa9..4dc4acd5603cb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2750,6 +2750,7 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations"> Group<f_Group>; def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>; def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>; +def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group<f_Group>; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption], diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1535f4cebf436..fbaa083d204b8 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -766,6 +766,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); + + if (Args.hasArg(options::OPT_ffast_real_mod)) + CmdArgs.push_back("-ffast-real-mod"); } static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/flang/include/flang/Support/LangOptions.def b/flang/include/flang/Support/LangOptions.def index ba72d7b4b7212..e310ecf37a52d 100644 --- a/flang/include/flang/Support/LangOptions.def +++ b/flang/include/flang/Support/LangOptions.def @@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0) LANGOPT(OpenMPNoNestedParallelism, 1, 0) /// Use SIMD only OpenMP support. LANGOPT(OpenMPSimd, 1, false) - +/// Enable fast MOD operations for REAL +LANGOPT(FastRealMod, 1, false) LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 6295a58b1bdad..5b3f64971013e 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1424,6 +1424,10 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast); } + if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) { + opts.FastRealMod = true; + } + return true; } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 3bef6b1c31825..d22124bc0bdeb 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -277,6 +277,14 @@ bool CodeGenAction::beginSourceFileAction() { ci.getInvocation().getLangOpts().OpenMPVersion); } + if (ci.getInvocation().getLangOpts().FastRealMod) { + auto mod = lb.getModule(); + mod.getOperation()->setAttr( + mlir::StringAttr::get(mod.getContext(), + llvm::Twine{"fir.fast_real_mod"}), + mlir::BoolAttr::get(mod.getContext(), true)); + } + // Create a parse tree and lower it to FIR parseAndLowerTree(ci, lb); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ce1376fd209cc..5e0e4fbf81717 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7009,8 +7009,30 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, } // MOD +static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value a, mlir::Value p) { + auto fastmathFlags = mlir::arith::FastMathFlags::contract; + auto fastmathAttr = + mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags); + mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr); + mlir::Type intType = builder.getIntegerType( + a.getType().getIntOrFloatBitWidth(), /*signed=*/true); + mlir::Value intResult = builder.createConvert(loc, intType, divResult); + mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult); + mlir::Value mulResult = + mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr); + mlir::Value subResult = + mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr); + return subResult; +} + mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef<mlir::Value> args) { + auto mod = builder.getModule(); + bool useFastRealMod = false; + if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod")) + useFastRealMod = attr.getValue(); + assert(args.size() == 2); if (resultType.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( @@ -7022,9 +7044,18 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa<mlir::IntegerType>(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - // Use runtime. - return builder.createConvert( - loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + if (useFastRealMod) { + // If fast MOD for REAL has been requested, generate less precise, + // but faster code directly. + assert(resultType.isFloat() && + "non floating-point type hit for fast real MOD"); + return builder.createConvert(loc, resultType, + genFastMod(builder, loc, args[0], args[1])); + } else { + // Use runtime. + return builder.createConvert( + loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + } } // MODULO diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 new file mode 100644 index 0000000000000..00607fa5c30d1 --- /dev/null +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -0,0 +1,75 @@ +! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} + +! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} + +! CHECK-LABEL: @_QPmod_real4 +subroutine mod_real4(r, a, p) + implicit none + real(kind=4) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f32 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f32 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f32 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32> + r = mod(a, p) +end subroutine mod_real4 + +! CHECK-LABEL: @_QPmod_real8 +subroutine mod_real8(r, a, p) + implicit none + real(kind=8) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f64 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f64 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f64 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64> + r = mod(a, p) +end subroutine mod_real8 + +! CHECK-LABEL: @_QPmod_real10 +subroutine mod_real10(r, a, p) + implicit none + real(kind=10) :: r, a, p +! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f80 +! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 +! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 +! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f80 +! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f80 +! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80> + r = mod(a, p) +end subroutine mod_real10 + +! CHECK-LABEL: @_QPmod_real16 +subroutine mod_real16(r, a, p) + implicit none + real(kind=16) :: r, a, p +! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f128 +! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128 +! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128 +! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f128 +! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f128 +! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128> + r = mod(a, p) +end subroutine mod_real16 `````````` </details> https://github.com/llvm/llvm-project/pull/160660 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
