Module: Mesa
Branch: master
Commit: df1d5174fccc6771e24ef09e0cd77dfa377a7b6a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=df1d5174fccc6771e24ef09e0cd77dfa377a7b6a

Author: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Date:   Thu Feb  1 16:37:15 2018 +0100

ac/nir: replace SI.buffer.load.dword with amdgcn.buffer.load

The old one generates useless instructions in there, found while
comparing geometry shaders between RadeonSI and RADV.

This improves all Vulkan demos that use geometry shaders, +4%
for deferredshadows, +9% for viewportarray, +7% for
geometryshader on Polaris10.

This seems to also improve DOW3 a little bit (+1%).

Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
Reviewed-by:  Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl>

---

 src/amd/common/ac_nir_to_llvm.c | 52 ++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 32 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 05b937803f..0f7d6258ac 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3047,7 +3047,6 @@ load_gs_input(struct ac_shader_abi *abi,
 {
        struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
        LLVMValueRef vtx_offset;
-       LLVMValueRef args[9];
        unsigned param, vtx_offset_param;
        LLVMValueRef value[4], result;
 
@@ -3065,20 +3064,16 @@ load_gs_input(struct ac_shader_abi *abi,
                                               LLVMConstInt(ctx->ac.i32, param 
* 4 + i + const_index, 0), "");
                        value[i] = ac_lds_load(&ctx->ac, dw_addr);
                } else {
-                       args[0] = ctx->esgs_ring;
-                       args[1] = vtx_offset;
-                       args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + 
const_index) * 256, false);
-                       args[3] = ctx->ac.i32_0;
-                       args[4] = ctx->ac.i32_1; /* OFFEN */
-                       args[5] = ctx->ac.i32_0; /* IDXEN */
-                       args[6] = ctx->ac.i32_1; /* GLC */
-                       args[7] = ctx->ac.i32_0; /* SLC */
-                       args[8] = ctx->ac.i32_0; /* TFE */
-
-                       value[i] = ac_build_intrinsic(&ctx->ac, 
"llvm.SI.buffer.load.dword.i32.i32",
-                                                     ctx->ac.i32, args, 9,
-                                                     AC_FUNC_ATTR_READONLY |
-                                                     AC_FUNC_ATTR_LEGACY);
+                       LLVMValueRef soffset =
+                               LLVMConstInt(ctx->ac.i32,
+                                            (param * 4 + i + const_index) * 
256,
+                                            false);
+
+                       value[i] = ac_build_buffer_load(&ctx->ac,
+                                                       ctx->esgs_ring, 1,
+                                                       ctx->ac.i32_0,
+                                                       vtx_offset, soffset,
+                                                       0, 1, 0, true, false);
                }
        }
        result = ac_build_varying_gather_values(&ctx->ac, value, 
num_components, component);
@@ -7166,16 +7161,9 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
 static void
 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
 {
-       LLVMValueRef args[9];
-       args[0] = ctx->gsvs_ring;
-       args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, 
LLVMConstInt(ctx->ac.i32, 4, false), "");
-       args[3] = ctx->ac.i32_0;
-       args[4] = ctx->ac.i32_1;  /* OFFEN */
-       args[5] = ctx->ac.i32_0; /* IDXEN */
-       args[6] = ctx->ac.i32_1;  /* GLC */
-       args[7] = ctx->ac.i32_1;  /* SLC */
-       args[8] = ctx->ac.i32_0; /* TFE */
-
+       LLVMValueRef vtx_offset =
+               LLVMBuildMul(ctx->builder, ctx->abi.vertex_id,
+                            LLVMConstInt(ctx->ac.i32, 4, false), "");
        int idx = 0;
 
        for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
@@ -7193,16 +7181,16 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
                }
 
                for (unsigned j = 0; j < length; j++) {
-                       LLVMValueRef value;
-                       args[2] = LLVMConstInt(ctx->ac.i32,
+                       LLVMValueRef value, soffset;
+
+                       soffset = LLVMConstInt(ctx->ac.i32,
                                               (slot * 4 + j) *
                                               ctx->gs_max_out_vertices * 16 * 
4, false);
 
-                       value = ac_build_intrinsic(&ctx->ac,
-                                                  
"llvm.SI.buffer.load.dword.i32.i32",
-                                                  ctx->ac.i32, args, 9,
-                                                  AC_FUNC_ATTR_READONLY |
-                                                  AC_FUNC_ATTR_LEGACY);
+                       value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring,
+                                                    1, ctx->ac.i32_0,
+                                                    vtx_offset, soffset,
+                                                    0, 1, 1, true, false);
 
                        LLVMBuildStore(ctx->builder,
                                       ac_to_float(&ctx->ac, value), 
ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);

_______________________________________________
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to