Module: Mesa
Branch: staging/20.0
Commit: dfc0a5cc14d33ea94555d4f4b316485f23cb1bd1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfc0a5cc14d33ea94555d4f4b316485f23cb1bd1

Author: Samuel Pitoiset <[email protected]>
Date:   Thu Mar 26 11:40:35 2020 +0100

ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()

Instead of emitting 1.0 / x which includes a slow division that
LLVM doesn't always optimize even if the metadata is correctly set.

No pipeline-db changes with VEGA10/LLVM 9.

pipeline-db (VEGA10/LLVM 10):
Totals from affected shaders:
SGPRS: 6672 -> 6672 (0.00 %)
VGPRS: 6652 -> 6652 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 561780 -> 561692 (-0.02 %) bytes
Max Waves: 1043 -> 1043 (0.00 %)

pipeline-db (VEGA10/LLVM 11 - 92744f62478):
Totals from affected shaders:
SGPRS: 84608 -> 83768 (-0.99 %)
VGPRS: 106768 -> 106636 (-0.12 %)
Spilled SGPRs: 1625 -> 1713 (5.42 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Code Size: 10850936 -> 10726712 (-1.14 %) bytes
Max Waves: 3152 -> 3180 (0.89 %)

LLVM 11 (master) is more affected than previous versions, but
based on the small impact with LLVM 9/10, I decided to emit it
unconditionally.

Cc: 20.0 <[email protected]>
Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Bas Nieuwenhuizen <[email protected]>
Reviewed-by: Marek Olšák <[email protected]>
Tested-by: Marge Bot 
<https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4326>
(cherry picked from commit ba2ec1f369d2c97fc7c54ecd52b0addcfd349a31)

---

 .pick_status.json            |  2 +-
 src/amd/llvm/ac_llvm_build.c | 30 ++++++++++++------------------
 src/amd/llvm/ac_llvm_build.h |  2 --
 3 files changed, 13 insertions(+), 21 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 3186759be9f..3016971a806 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -607,7 +607,7 @@
         "description": "ac/nir: use llvm.amdgcn.rcp in ac_build_fdiv()",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 6532c57a234..4b2331a524a 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -65,8 +65,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
                     enum ac_float_mode float_mode, unsigned wave_size,
                     unsigned ballot_mask_bits)
 {
-       LLVMValueRef args[1];
-
        ctx->context = LLVMContextCreate();
 
        ctx->chip_class = chip_class;
@@ -127,11 +125,6 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
        ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
                                                               
"invariant.load", 14);
 
-       ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 
6);
-
-       args[0] = LLVMConstReal(ctx->f32, 2.5);
-       ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
        ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
                                                        "amdgpu.uniform", 14);
 
@@ -707,17 +700,18 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
              LLVMValueRef num,
              LLVMValueRef den)
 {
-       /* If we do (num / den), LLVM >= 7.0 does:
-        *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f 
: 1.0f));
-        *
-        * If we do (num * (1 / den)), LLVM does:
-        *    return num * v_rcp_f32(den);
-        */
-       LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
-       LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
-       /* Use v_rcp_f32 instead of precise division. */
-       if (!LLVMIsConstant(rcp))
-               LLVMSetMetadata(rcp, ctx->fpmath_md_kind, 
ctx->fpmath_md_2p5_ulp);
+       unsigned type_size = ac_get_type_size(LLVMTypeOf(den));
+       const char *name;
+
+       if (type_size == 2)
+               name = "llvm.amdgcn.rcp.f16";
+       else if (type_size == 4)
+               name = "llvm.amdgcn.rcp.f32";
+       else
+               name = "llvm.amdgcn.rcp.f64";
+
+        LLVMValueRef rcp = ac_build_intrinsic(ctx, name, LLVMTypeOf(den),
+                                              &den, 1, AC_FUNC_ATTR_READNONE);
 
        return LLVMBuildFMul(ctx->builder, num, rcp, "");
 }
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 772054efecd..e08ab656f9c 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -117,8 +117,6 @@ struct ac_llvm_context {
        unsigned range_md_kind;
        unsigned invariant_load_md_kind;
        unsigned uniform_md_kind;
-       unsigned fpmath_md_kind;
-       LLVMValueRef fpmath_md_2p5_ulp;
        LLVMValueRef empty_md;
 
        enum chip_class chip_class;

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to