On 19.05.2017 17:54, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/ac_llvm_build.c           | 50 ++++++++++++++++++++++++--------
 src/amd/common/ac_llvm_build.h           |  3 +-
 src/amd/common/ac_nir_to_llvm.c          |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c | 23 +++++++--------
 4 files changed, 51 insertions(+), 27 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 87a1fb7..4b048ba 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -626,47 +626,73 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
                     int num_channels,
                     LLVMValueRef vindex,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
-                    bool readonly_memory)
+                    bool readonly_memory,
+                    bool coherent)

I think the series is basically fine, but the use of "coherent" here is potentially confusing because "coherent" is already used as the memory qualifier in shaders, and it sets glc.

Are there any buffer loads with readonly_memory && !glc && !vindex && !voffset where we aren't allowed to use SMEM? If possible, we could just use that to control whether llvm.SI.load.const is used.

I seem to recall that there were some subtle differences in bounds checking, and perhaps those prevent the use of SMEM sometimes. But then I'd rather use a more explicit parameter name like smem_forbidden or smem_allowed.

Cheers,
Nicolai


 {
+       LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+       if (voffset)
+               offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+       if (soffset)
+               offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+       /* Loads from a shader buffer that has no stores in the same shader
+        * and is non-coherent with other shader invocations can use SMEM.
+        */
+       if (readonly_memory && !coherent) {
+               assert(vindex == NULL);
+               assert(glc == 0);
+               assert(slc == 0);
+
+               LLVMValueRef result[4];
+
+               for (int i = 0; i < num_channels; i++) {
+                       if (i) {
+                               offset = LLVMBuildAdd(ctx->builder, offset,
+                                                     LLVMConstInt(ctx->i32, 4, 0), 
"");
+                       }
+                       LLVMValueRef args[2] = {rsrc, offset};
+                       result[i] = ac_build_intrinsic(ctx, 
"llvm.SI.load.const.v4i32",
+                                                      ctx->f32, args, 2,
+                                                      AC_FUNC_ATTR_READNONE |
+                                                      AC_FUNC_ATTR_LEGACY);
+               }
+               if (num_channels == 1)
+                       return result[0];
+
+               if (num_channels == 3)
+                       result[num_channels++] = LLVMGetUndef(ctx->f32);
+               return ac_build_gather_values(ctx, result, num_channels);
+       }
+
        unsigned func = CLAMP(num_channels, 1, 3) - 1;

        LLVMValueRef args[] = {
                LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
                vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
-               LLVMConstInt(ctx->i32, inst_offset, 0),
+               offset,
                LLVMConstInt(ctx->i1, glc, 0),
                LLVMConstInt(ctx->i1, slc, 0)
        };

        LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
                               ctx->v4f32};
        const char *type_names[] = {"f32", "v2f32", "v4f32"};
        char name[256];

-       if (voffset) {
-               args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
-                               "");
-       }
-
-       if (soffset) {
-               args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
-                               "");
-       }
-
        snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
                 type_names[func]);

        return ac_build_intrinsic(ctx, name, types[func], args,
                                  ARRAY_SIZE(args),
                                  /* READNONE means writes can't affect it, 
while
                                   * READONLY means that writes can affect it. 
*/
                                  readonly_memory && HAVE_LLVM >= 0x0400 ?
                                          AC_FUNC_ATTR_READNONE :
                                          AC_FUNC_ATTR_READONLY);
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 0ecbc4a..754461b 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -136,21 +136,22 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef rsrc,
                     int num_channels,
                     LLVMValueRef vindex,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
                     unsigned glc,
                     unsigned slc,
-                    bool readonly_memory);
+                    bool readonly_memory,
+                    bool coherent);

 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef rsrc,
                                         LLVMValueRef vindex,
                                         LLVMValueRef voffset,
                                         bool readonly_memory);

 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 8ae0a75..9794c70 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2816,21 +2816,21 @@ load_tes_input(struct nir_to_llvm_context *ctx,
        param = 
shader_io_get_unique_index(instr->variables[0]->var->data.location);
        if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 
&&
            is_compact && const_index > 3) {
                const_index -= 3;
                param++;
        }
        buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
                                                     is_compact, vertex_index, 
indir_index);

        result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, 
instr->num_components, NULL,
-                                     buf_addr, ctx->oc_lds, is_compact ? (4 * 
const_index) : 0, 1, 0, true);
+                                     buf_addr, ctx->oc_lds, is_compact ? (4 * 
const_index) : 0, 1, 0, true, true);
        result = trim_vector(ctx, result, instr->num_components);
        result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, 
&instr->dest.ssa), "");
        return result;
 }

 static LLVMValueRef
 load_gs_input(struct nir_to_llvm_context *ctx,
              nir_intrinsic_instr *instr)
 {
        LLVMValueRef indir_index, vtx_offset;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 61f1384..7dd121b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -826,39 +826,39 @@ static LLVMValueRef buffer_load(struct 
lp_build_tgsi_context *bld_base,
                                 LLVMValueRef base, bool readonly_memory)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value, value2;
        LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
        LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);

        if (swizzle == ~0) {
                value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, 
offset,
-                                            0, 1, 0, readonly_memory);
+                                            0, 1, 0, readonly_memory, true);

                return LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
        }

        if (!tgsi_type_is_64bit(type)) {
                value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, 
offset,
-                                            0, 1, 0, readonly_memory);
+                                            0, 1, 0, readonly_memory, true);

                value = LLVMBuildBitCast(gallivm->builder, value, vec_type, "");
                return LLVMBuildExtractElement(gallivm->builder, value,
                                    LLVMConstInt(ctx->i32, swizzle, 0), "");
        }

        value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
-                                 swizzle * 4, 1, 0, readonly_memory);
+                                 swizzle * 4, 1, 0, readonly_memory, true);

        value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
-                                  swizzle * 4 + 4, 1, 0, readonly_memory);
+                                  swizzle * 4 + 4, 1, 0, readonly_memory, 
true);

        return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
 }

 /**
  * Load from LDS.
  *
  * \param type         output value type
  * \param swizzle      offset (typically 0..3); it can be ~0, which loads a 
vec4
  * \param dw_addr      address in dwords
@@ -1147,28 +1147,28 @@ static LLVMValueRef fetch_input_gs(
                vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
        }
        vtx_offset = lp_build_mul_imm(uint,
                                      LLVMGetParam(ctx->main_fn,
                                                   vtx_offset_param),
                                      4);

        soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);

        value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
-                                    vtx_offset, soffset, 0, 1, 0, true);
+                                    vtx_offset, soffset, 0, 1, 0, true, true);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 
256, 0);

                value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
                                              ctx->i32_0, vtx_offset, soffset,
-                                             0, 1, 0, true);
+                                             0, 1, 0, true, true);
                return si_llvm_emit_fetch_64bit(bld_base, type,
                                                value, value2);
        }
        return LLVMBuildBitCast(gallivm->builder,
                                value,
                                tgsi2llvmtype(bld_base, type), "");
 }

 static int lookup_interp_param_index(unsigned interpolate, unsigned location)
 {
@@ -1382,26 +1382,22 @@ static LLVMValueRef get_sample_id(struct 
si_shader_context *ctx)
 }


 /**
  * Load a dword from a constant buffer.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
-       LLVMBuilderRef builder = ctx->gallivm.builder;
-       LLVMValueRef args[2] = {resource, offset};
-
-       return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", 
ctx->f32, args, 2,
-                                 LP_FUNC_ATTR_READNONE |
-                                 LP_FUNC_ATTR_LEGACY);
+       return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
+                                   0, 0, 0, true, false);
 }

 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, 
LLVMValueRef sample_id)
 {
        struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
        LLVMValueRef buf_index = LLVMConstInt(ctx->i32, 
SI_PS_CONST_SAMPLE_POSITIONS, 0);
        LLVMValueRef resource = ac_build_indexed_load_const(&ctx->ac, desc, 
buf_index);
@@ -5192,21 +5188,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                                }

                                LLVMValueRef soffset = LLVMConstInt(ctx.i32,
                                        offset * 
gs_selector->gs_max_out_vertices * 16 * 4, 0);
                                offset++;

                                outputs[i].values[chan] =
                                        ac_build_buffer_load(&ctx.ac,
                                                             ctx.gsvs_ring[0], 
1,
                                                             ctx.i32_0, voffset,
-                                                            soffset, 0, 1, 1, 
true);
+                                                            soffset, 0, 1, 1,
+                                                            true, true);
                        }
                }

                /* Streamout and exports. */
                if (gs_selector->so.num_outputs) {
                        si_llvm_emit_streamout(&ctx, outputs,
                                               gsinfo->num_outputs,
                                               stream);
                }




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to