On Wed, Nov 16, 2016 at 11:13:45AM +0100, Nicolai Hähnle wrote:
> Have you looked at the shader-db impact?
> 

shader-db is mostly unchanged.  There are a few decreases in SGPR usage and
code size, and a 4 byte increase in code size for one shader.

> I do think we should eventually do this, but llvm.SI.vs.load.input is
> ReadNone while llvm.amdgcn.buffer.load.* is only ReadOnly, so as long as we
> can't teach LLVM properly about no-aliasing and speculability, there may be
> performance regressions.
> 

Ideally llvm.amdgcn.buffer.load.* would be ReadOnly and ArgMemOnly, but I think
as long as it has non-pointer arguments this combination behaves the same as
ReadNone, which would be incorrect.

-Tom

> Cheers,
> Nicolai
> 
> On 16.11.2016 03:14, Tom Stellard wrote:
> >---
> > src/gallium/drivers/radeonsi/si_shader.c | 69 
> > +++++++++++++++++++++++---------
> > 1 file changed, 50 insertions(+), 19 deletions(-)
> >
> >diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> >b/src/gallium/drivers/radeonsi/si_shader.c
> >index 306e12f..ee4fe2f 100644
> >--- a/src/gallium/drivers/radeonsi/si_shader.c
> >+++ b/src/gallium/drivers/radeonsi/si_shader.c
> >@@ -82,6 +82,17 @@ static void si_build_ps_prolog_function(struct 
> >si_shader_context *ctx,
> > static void si_build_ps_epilog_function(struct si_shader_context *ctx,
> >                                     union si_shader_part_key *key);
> >
> >+static LLVMValueRef build_buffer_load(struct si_shader_context *ctx,
> >+                                      LLVMValueRef rsrc,
> >+                                      int num_channels,
> >+                                      LLVMValueRef vindex,
> >+                                      LLVMValueRef voffset,
> >+                                      LLVMValueRef soffset,
> >+                                      unsigned inst_offset,
> >+                                      unsigned glc,
> >+                                      unsigned slc,
> >+                                  bool is_format);
> >+
> > /* Ideally pass the sample mask input to the PS epilog as v13, which
> >  * is its usual location, so that the shader doesn't have to add v_mov.
> >  */
> >@@ -368,6 +379,31 @@ static LLVMValueRef get_instance_index_for_fetch(
> >                         LLVMGetParam(radeon_bld->main_fn, 
> > param_start_instance), "");
> > }
> >
> >+static LLVMValueRef build_vs_load_input(struct si_shader_context *ctx,
> >+                                    LLVMValueRef rsrc,
> >+                                    LLVMValueRef index,
> >+                                    LLVMValueRef offset) {
> >+
> >+    struct lp_build_context *base = &ctx->soa.bld_base.base;
> >+    struct lp_build_context *uint = &ctx->soa.bld_base.uint_bld;
> >+    struct gallivm_state *gallivm = base->gallivm;
> >+
> >+    LLVMValueRef args[8];
> >+
> >+    if (HAVE_LLVM < 0x0400) {
> >+            args[0] = rsrc;
> >+            args[1] = offset;
> >+            args[2] = index;
> >+
> >+            return lp_build_intrinsic(gallivm->builder,
> >+                    "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> >+                    LP_FUNC_ATTR_READNONE);
> >+    }
> >+
> >+    return build_buffer_load(ctx, rsrc, 4, index, offset,
> >+                             uint->zero, 0, 0, 0, true);
> >+}
> >+
> > static void declare_input_vs(
> >     struct si_shader_context *ctx,
> >     unsigned input_index,
> >@@ -385,7 +421,6 @@ static void declare_input_vs(
> >     LLVMValueRef t_list;
> >     LLVMValueRef attribute_offset;
> >     LLVMValueRef buffer_index;
> >-    LLVMValueRef args[3];
> >     LLVMValueRef input;
> >
> >     /* Load the T list */
> >@@ -402,12 +437,8 @@ static void declare_input_vs(
> >                                 ctx->param_vertex_index0 +
> >                                 input_index);
> >
> >-    args[0] = t_list;
> >-    args[1] = attribute_offset;
> >-    args[2] = buffer_index;
> >-    input = lp_build_intrinsic(gallivm->builder,
> >-            "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
> >-            LP_FUNC_ATTR_READNONE);
> >+    input = build_vs_load_input(ctx, t_list, buffer_index,
> >+                                attribute_offset);
> >
> >     /* Break up the vec4 into individual components */
> >     for (chan = 0; chan < 4; chan++) {
> >@@ -808,7 +839,8 @@ static LLVMValueRef build_buffer_load(struct 
> >si_shader_context *ctx,
> >                                       LLVMValueRef soffset,
> >                                       unsigned inst_offset,
> >                                       unsigned glc,
> >-                                      unsigned slc)
> >+                                      unsigned slc,
> >+                                  bool is_format)
> > {
> >     struct gallivm_state *gallivm = &ctx->gallivm;
> >     unsigned func = CLAMP(num_channels, 1, 3) - 1;
> >@@ -837,8 +869,8 @@ static LLVMValueRef build_buffer_load(struct 
> >si_shader_context *ctx,
> >                                            "");
> >             }
> >
> >-            snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
> >-                     type_names[func]);
> >+            snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s%s",
> >+                     is_format ? "format." : "", type_names[func]);
> >
> >             return lp_build_intrinsic(gallivm->builder, name, types[func], 
> > args,
> >                                       ARRAY_SIZE(args), 
> > LP_FUNC_ATTR_READONLY);
> >@@ -889,14 +921,14 @@ static LLVMValueRef buffer_load(struct 
> >lp_build_tgsi_context *bld_base,
> >
> >     if (swizzle == ~0) {
> >             value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> >-                                      0, 1, 0);
> >+                                      0, 1, 0, false);
> >
> >             return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> >     }
> >
> >     if (!tgsi_type_is_64bit(type)) {
> >             value = build_buffer_load(ctx, buffer, 4, NULL, base, offset,
> >-                                      0, 1, 0);
> >+                                      0, 1, 0, false);
> >
> >             value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
> >             return LLVMBuildExtractElement(gallivm->builder, value,
> >@@ -904,10 +936,10 @@ static LLVMValueRef buffer_load(struct 
> >lp_build_tgsi_context *bld_base,
> >     }
> >
> >     value = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> >-                              swizzle * 4, 1, 0);
> >+                              swizzle * 4, 1, 0, false);
> >
> >     value2 = build_buffer_load(ctx, buffer, 1, NULL, base, offset,
> >-                               swizzle * 4 + 4, 1, 0);
> >+                               swizzle * 4 + 4, 1, 0, false);
> >
> >     return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
> > }
> >@@ -4779,11 +4811,10 @@ static void build_tex_intrinsic(const struct 
> >lp_build_tgsi_action *action,
> >     const char *infix = "";
> >
> >     if (target == TGSI_TEXTURE_BUFFER) {
> >-            emit_data->output[emit_data->chan] = lp_build_intrinsic(
> >-                    base->gallivm->builder,
> >-                    "llvm.SI.vs.load.input", emit_data->dst_type,
> >-                    emit_data->args, emit_data->arg_count,
> >-                    LP_FUNC_ATTR_READNONE);
> >+            emit_data->output[emit_data->chan] =
> >+                    build_vs_load_input(ctx, emit_data->args[0],
> >+                                        emit_data->args[2],
> >+                                        emit_data->args[1]);
> >             return;
> >     }
> >
> >
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to