atomic ops using ac_build_image_opcode

Nicolai Hähnle Wed, 11 Apr 2018 04:15:13 -0700

From: Nicolai HÃ¤hnle <nicolai.haeh...@amd.com>

In preparation of dimension-aware LLVM image intrinsics.
---
 src/amd/common/ac_llvm_build.c                     | 105 +++++++---
 src/amd/common/ac_llvm_build.h                     |  37 +++-
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h |   2 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  | 230 +++++++++------------
 4 files changed, 210 insertions(+), 164 deletions(-)


diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index edc729c0127..431ec1defb9 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1466,35 +1466,40 @@ static unsigned ac_num_derivs(enum ac_image_dim dim)
        case ac_image_2darraymsaa:
        default:
                unreachable("derivatives not supported");
        }
 }
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                   struct ac_image_args *a)
 {
        LLVMValueRef args[16];
+       LLVMTypeRef retty = ctx->v4f32;
        const char *name = NULL;
-       char intr_name[128], type[64];
+       const char *atomic_subop = "";
+       char intr_name[128], coords_type[64];
 
        assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
               !a->level_zero);
-       assert((a->opcode != ac_image_get_resinfo && a->opcode != 
ac_image_load_mip) ||
+       assert((a->opcode != ac_image_get_resinfo && a->opcode != 
ac_image_load_mip &&
+               a->opcode != ac_image_store_mip) ||
               a->lod);
        assert((a->bias ? 1 : 0) +
               (a->lod ? 1 : 0) +
               (a->level_zero ? 1 : 0) +
               (a->derivs[0] ? 1 : 0) <= 1);
 
        bool sample = a->opcode == ac_image_sample ||
                      a->opcode == ac_image_gather4 ||
                      a->opcode == ac_image_get_lod;
+       bool atomic = a->opcode == ac_image_atomic ||
+                     a->opcode == ac_image_atomic_cmpswap;
        bool da = a->dim == ac_image_cube ||
                  a->dim == ac_image_1darray ||
                  a->dim == ac_image_2darray ||
                  a->dim == ac_image_2darraymsaa;
        if (a->opcode == ac_image_get_lod)
                da = false;
 
        unsigned num_coords =
                a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
        LLVMValueRef addr;
@@ -1516,80 +1521,126 @@ LLVMValueRef ac_build_image_opcode(struct 
ac_llvm_context *ctx,
        if (a->lod)
                args[num_addr++] = ac_to_integer(ctx, a->lod);
 
        unsigned pad_goal = util_next_power_of_two(num_addr);
        while (num_addr < pad_goal)
                args[num_addr++] = LLVMGetUndef(ctx->i32);
 
        addr = ac_build_gather_values(ctx, args, num_addr);
 
        unsigned num_args = 0;
+       if (atomic || a->opcode == ac_image_store || a->opcode == 
ac_image_store_mip) {
+               args[num_args++] = a->data[0];
+               if (a->opcode == ac_image_atomic_cmpswap)
+                       args[num_args++] = a->data[1];
+       }
+
+       unsigned coords_arg = num_args;
        if (sample)
                args[num_args++] = ac_to_float(ctx, addr);
        else
                args[num_args++] = ac_to_integer(ctx, addr);
 
        args[num_args++] = a->resource;
        if (sample)
                args[num_args++] = a->sampler;
-       args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
-       if (sample)
-               args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
-       args[num_args++] = ctx->i1false; /* glc */
-       args[num_args++] = ctx->i1false; /* slc */
-       args[num_args++] = ctx->i1false; /* lwe */
-       args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
+       if (!atomic) {
+               args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
+               if (sample)
+                       args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
+               args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : 
ctx->i1false;
+               args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : 
ctx->i1false;
+               args[num_args++] = ctx->i1false; /* lwe */
+               args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
+       } else {
+               args[num_args++] = ctx->i1false; /* r128 */
+               args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
+               args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : 
ctx->i1false;
+       }
 
        switch (a->opcode) {
        case ac_image_sample:
                name = "llvm.amdgcn.image.sample";
                break;
        case ac_image_gather4:
                name = "llvm.amdgcn.image.gather4";
                break;
        case ac_image_load:
                name = "llvm.amdgcn.image.load";
                break;
        case ac_image_load_mip:
                name = "llvm.amdgcn.image.load.mip";
                break;
+       case ac_image_store:
+               name = "llvm.amdgcn.image.store";
+               retty = ctx->voidt;
+               break;
+       case ac_image_store_mip:
+               name = "llvm.amdgcn.image.store.mip";
+               retty = ctx->voidt;
+               break;
+       case ac_image_atomic:
+       case ac_image_atomic_cmpswap:
+               name = "llvm.amdgcn.image.atomic.";
+               retty = ctx->i32;
+               if (a->opcode == ac_image_atomic_cmpswap) {
+                       atomic_subop = "cmpswap";
+               } else {
+                       switch (a->atomic) {
+                       case ac_atomic_swap: atomic_subop = "swap"; break;
+                       case ac_atomic_add: atomic_subop = "add"; break;
+                       case ac_atomic_sub: atomic_subop = "sub"; break;
+                       case ac_atomic_smin: atomic_subop = "smin"; break;
+                       case ac_atomic_umin: atomic_subop = "umin"; break;
+                       case ac_atomic_smax: atomic_subop = "smax"; break;
+                       case ac_atomic_umax: atomic_subop = "umax"; break;
+                       case ac_atomic_and: atomic_subop = "and"; break;
+                       case ac_atomic_or: atomic_subop = "or"; break;
+                       case ac_atomic_xor: atomic_subop = "xor"; break;
+                       }
+               }
+               break;
        case ac_image_get_lod:
                name = "llvm.amdgcn.image.getlod";
                break;
        case ac_image_get_resinfo:
                name = "llvm.amdgcn.image.getresinfo";
                break;
        default:
                unreachable("invalid image opcode");
        }
 
-       ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
-                                   sizeof(type));
+       ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type,
+                                   sizeof(coords_type));
 
-       bool lod_suffix =
-               a->lod && (a->opcode == ac_image_sample || a->opcode == 
ac_image_gather4);
-
-       snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
-               name,
-               a->compare ? ".c" : "",
-               a->bias ? ".b" :
-               lod_suffix ? ".l" :
-               a->derivs[0] ? ".d" :
-               a->level_zero ? ".lz" : "",
-               a->offset ? ".o" : "",
-               type);
+       if (atomic) {
+               snprintf(intr_name, sizeof(intr_name), 
"llvm.amdgcn.image.atomic.%s.%s",
+                        atomic_subop, coords_type);
+       } else {
+               bool lod_suffix =
+                       a->lod && (a->opcode == ac_image_sample || a->opcode == 
ac_image_gather4);
+
+               snprintf(intr_name, sizeof(intr_name), 
"%s%s%s%s.v4f32.%s.v8i32",
+                       name,
+                       a->compare ? ".c" : "",
+                       a->bias ? ".b" :
+                       lod_suffix ? ".l" :
+                       a->derivs[0] ? ".d" :
+                       a->level_zero ? ".lz" : "",
+                       a->offset ? ".o" : "",
+                       coords_type);
+       }
 
        LLVMValueRef result =
-               ac_build_intrinsic(ctx, intr_name,
-                                  ctx->v4f32, args, num_args,
-                                  AC_FUNC_ATTR_READNONE);
-       if (!sample) {
+               ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
+                                  a->attributes);
+       if (!sample && retty == ctx->v4f32) {
                result = LLVMBuildBitCast(ctx->builder, result,
                                          ctx->v4i32, "");
        }
        return result;
 }
 
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                    LLVMValueRef args[2])
 {
        if (HAVE_LLVM >= 0x0500) {
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index fcd465ef070..6869ac68ab4 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -306,50 +306,77 @@ struct ac_export_args {
 
 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
 
 void ac_build_export_null(struct ac_llvm_context *ctx);
 
 enum ac_image_opcode {
        ac_image_sample,
        ac_image_gather4,
        ac_image_load,
        ac_image_load_mip,
+       ac_image_store,
+       ac_image_store_mip,
        ac_image_get_lod,
        ac_image_get_resinfo,
+       ac_image_atomic,
+       ac_image_atomic_cmpswap,
+};
+
+enum ac_atomic_op {
+       ac_atomic_swap,
+       ac_atomic_add,
+       ac_atomic_sub,
+       ac_atomic_smin,
+       ac_atomic_umin,
+       ac_atomic_smax,
+       ac_atomic_umax,
+       ac_atomic_and,
+       ac_atomic_or,
+       ac_atomic_xor,
 };
 
 enum ac_image_dim {
        ac_image_1d,
        ac_image_2d,
        ac_image_3d,
        ac_image_cube, // includes cube arrays
        ac_image_1darray,
        ac_image_2darray,
        ac_image_2dmsaa,
        ac_image_2darraymsaa,
 };
 
+/* These cache policy bits match the definitions used by the LLVM intrinsics. 
*/
+enum ac_image_cache_policy {
+       ac_glc = 1 << 0,
+       ac_slc = 1 << 1,
+};
+
 struct ac_image_args {
-       enum ac_image_opcode opcode;
-       enum ac_image_dim dim;
+       enum ac_image_opcode opcode : 4;
+       enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+       enum ac_image_dim dim : 3;
+       unsigned dmask : 4;
+       unsigned cache_policy : 2;
+       bool unorm : 1;
+       bool level_zero : 1;
+       unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
 
        LLVMValueRef resource;
        LLVMValueRef sampler;
+       LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is 
cmp for cmpswap */
        LLVMValueRef offset;
        LLVMValueRef bias;
        LLVMValueRef compare;
        LLVMValueRef derivs[6];
        LLVMValueRef coords[4];
        LLVMValueRef lod; // also used by ac_image_get_resinfo
-       unsigned dmask;
-       bool unorm;
-       bool level_zero;
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                   struct ac_image_args *a);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                    LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
                                     LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
                                     LLVMValueRef args[2]);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
index d30f9da539e..dbe614d57c5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
@@ -43,21 +43,21 @@ struct lp_build_emit_data {
     * order of the arguments should be as follows:
     * SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, 
s2.w
     * AOS: s0.xyzw, s1.xyzw, s2.xyzw
     * TEXTURE Instructions: coord.xyzw
     *
     * Arguments should be packed into the args array.  For example an SOA
     * instructions that reads s0.x and s1.x args should look like this:
     * args[0] = s0.x;
     * args[1] = s1.x;
     */
-   LLVMValueRef args[18];
+   LLVMValueRef args[20];
 
    /**
     * Number of arguments in the args array.
     */
    unsigned arg_count;
 
    /**
     * The type output type of the opcode.  This should be set in the
     * lp_build_tgsi_action::fetch_args function.
     */
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 1c653839aea..7eabcbabfdb 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -84,30 +84,20 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
                index = si_get_indirect_index(ctx, &reg->Indirect,
                                              1, reg->Register.Index);
        }
 
        if (ubo)
                return ctx->abi.load_ubo(&ctx->abi, index);
        else
                return ctx->abi.load_ssbo(&ctx->abi, index, false);
 }
 
-static bool tgsi_is_array_image(unsigned target)
-{
-       return target == TGSI_TEXTURE_3D ||
-              target == TGSI_TEXTURE_CUBE ||
-              target == TGSI_TEXTURE_1D_ARRAY ||
-              target == TGSI_TEXTURE_2D_ARRAY ||
-              target == TGSI_TEXTURE_CUBE_ARRAY ||
-              target == TGSI_TEXTURE_2D_ARRAY_MSAA;
-}
-
 static enum ac_image_dim
 ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum 
tgsi_texture_type target)
 {
        switch (target) {
        case TGSI_TEXTURE_1D:
        case TGSI_TEXTURE_SHADOW1D:
                if (screen->info.chip_class >= GFX9)
                        return ac_image_2d;
                return ac_image_1d;
        case TGSI_TEXTURE_2D:
@@ -132,20 +122,42 @@ ac_texture_dim_from_tgsi_target(struct si_screen *screen, 
enum tgsi_texture_type
                return ac_image_2darray;
        case TGSI_TEXTURE_2D_MSAA:
                return ac_image_2dmsaa;
        case TGSI_TEXTURE_2D_ARRAY_MSAA:
                return ac_image_2darraymsaa;
        default:
                unreachable("unhandled texture type");
        }
 }
 
+static enum ac_image_dim
+ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type 
target)
+{
+       enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target);
+
+       /* Match the resource type set in the descriptor. */
+       if (dim == ac_image_cube ||
+           (screen->info.chip_class <= VI && dim == ac_image_3d))
+               dim = ac_image_2darray;
+       else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) {
+               /* When a single layer of a 3D texture is bound, the shader
+                * will refer to a 2D target, but the descriptor has a 3D type.
+                * Since the HW ignores BASE_ARRAY in this case, we need to
+                * send 3 coordinates. This doesn't hurt when the underlying
+                * texture is non-3D.
+                */
+               dim = ac_image_3d;
+       }
+
+       return dim;
+}
+
 /**
  * Given a 256-bit resource descriptor, force the DCC enable bit to off.
  *
  * At least on Tonga, executing image stores on images with DCC enabled and
  * non-trivial can eventually lead to lockups. This can occur when an
  * application binds an image as read-only but then uses a shader that writes
  * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
  * program termination) in this case, but it doesn't cost much to be a bit
  * nicer: disabling DCC in the shader still leads to undefined results but
  * avoids the lockup.
@@ -248,114 +260,62 @@ image_fetch_rsrc(
                 */
                index = LLVMBuildMul(ctx->ac.builder, index,
                                     LLVMConstInt(ctx->i32, 2, 0), "");
        }
 
        *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
                                   target == TGSI_TEXTURE_BUFFER ? 
AC_DESC_BUFFER : AC_DESC_IMAGE,
                                   dcc_off);
 }
 
-static LLVMValueRef image_fetch_coords(
+static void image_fetch_coords(
                struct lp_build_tgsi_context *bld_base,
                const struct tgsi_full_instruction *inst,
-               unsigned src, LLVMValueRef desc)
+               unsigned src, LLVMValueRef desc,
+               LLVMValueRef *coords)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        unsigned target = inst->Memory.Texture;
-       unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
-       LLVMValueRef coords[4];
+       const unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
        LLVMValueRef tmp;
        int chan;
 
        for (chan = 0; chan < num_coords; ++chan) {
                tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
                tmp = ac_to_integer(&ctx->ac, tmp);
                coords[chan] = tmp;
        }
 
        if (ctx->screen->info.chip_class >= GFX9) {
                /* 1D textures are allocated and used as 2D on GFX9. */
                if (target == TGSI_TEXTURE_1D) {
                        coords[1] = ctx->i32_0;
-                       num_coords++;
                } else if (target == TGSI_TEXTURE_1D_ARRAY) {
                        coords[2] = coords[1];
                        coords[1] = ctx->i32_0;
-                       num_coords++;
                } else if (target == TGSI_TEXTURE_2D) {
                        /* The hw can't bind a slice of a 3D image as a 2D
                         * image, because it ignores BASE_ARRAY if the target
                         * is 3D. The workaround is to read BASE_ARRAY and set
                         * it as the 3rd address operand for all 2D images.
                         */
                        LLVMValueRef first_layer, const5, mask;
 
                        const5 = LLVMConstInt(ctx->i32, 5, 0);
                        mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 
0);
                        first_layer = LLVMBuildExtractElement(builder, desc, 
const5, "");
                        first_layer = LLVMBuildAnd(builder, first_layer, mask, 
"");
 
                        coords[2] = first_layer;
-                       num_coords++;
-               }
-       }
-
-       if (num_coords == 1)
-               return coords[0];
-
-       if (num_coords == 3) {
-               /* LLVM has difficulties lowering 3-element vectors. */
-               coords[3] = bld_base->uint_bld.undef;
-               num_coords = 4;
-       }
-
-       return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
-}
-
-/**
- * Append the extra mode bits that are used by image load and store.
- */
-static void image_append_args(
-               struct si_shader_context *ctx,
-               struct lp_build_emit_data * emit_data,
-               unsigned target,
-               bool atomic,
-               bool force_glc)
-{
-       const struct tgsi_full_instruction *inst = emit_data->inst;
-       LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
-       LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
-       LLVMValueRef r128 = i1false;
-       LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
-       LLVMValueRef glc =
-               force_glc ||
-               inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
-               i1true : i1false;
-       LLVMValueRef slc = i1false;
-       LLVMValueRef lwe = i1false;
-
-       if (atomic) {
-               emit_data->args[emit_data->arg_count++] = r128;
-               emit_data->args[emit_data->arg_count++] = da;
-               if (!atomic) {
-                       emit_data->args[emit_data->arg_count++] = glc;
                }
-               emit_data->args[emit_data->arg_count++] = slc;
-               return;
        }
-
-       emit_data->args[emit_data->arg_count++] = glc;
-       emit_data->args[emit_data->arg_count++] = slc;
-       emit_data->args[emit_data->arg_count++] = lwe;
-       emit_data->args[emit_data->arg_count++] = da;
 }
 
 /**
  * Append the resource and indexing arguments for buffer intrinsics.
  *
  * \param rsrc the v4i32 buffer resource
  * \param index index into the buffer (stride-based)
  * \param offset byte offset into the buffer
  */
 static void buffer_append_args(
@@ -402,35 +362,28 @@ static void load_fetch_args(
                bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
                rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
                offset = ac_to_integer(&ctx->ac, tmp);
 
                buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, false, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
-               LLVMValueRef coords;
-
                image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
-               coords = image_fetch_coords(bld_base, inst, 1, rsrc);
+               image_fetch_coords(bld_base, inst, 1, rsrc, 
&emit_data->args[1]);
 
                if (target == TGSI_TEXTURE_BUFFER) {
-                       buffer_append_args(ctx, emit_data, rsrc, coords,
+                       buffer_append_args(ctx, emit_data, rsrc, 
emit_data->args[1],
                                           ctx->i32_0, false, false);
                } else {
-                       emit_data->args[0] = coords;
-                       emit_data->args[1] = rsrc;
-                       emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* 
dmask */
-                       emit_data->arg_count = 3;
-
-                       image_append_args(ctx, emit_data, target, false, false);
+                       emit_data->args[0] = rsrc;
                }
        }
 }
 
 static void load_emit_buffer(struct si_shader_context *ctx,
                             struct lp_build_emit_data *emit_data,
                             bool can_speculate, bool allow_smem)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        uint writemask = inst->Dst[0].Register.WriteMask;
@@ -565,24 +518,22 @@ static bool is_oneway_access_only(const struct 
tgsi_full_instruction *inst,
        }
        return false;
 }
 
 static void load_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->ac.builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        const struct tgsi_shader_info *info = &ctx->shader->selector->info;
-       char intrinsic_name[64];
        bool can_speculate = false;
 
        if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
                load_emit_memory(ctx, emit_data);
                return;
        }
 
        if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
                load_emit_buffer(ctx, emit_data, true, true);
                return;
@@ -609,31 +560,32 @@ static void load_emit(
                        ac_build_buffer_load_format(&ctx->ac,
                                                    emit_data->args[0],
                                                    emit_data->args[1],
                                                    emit_data->args[2],
                                                    num_channels,
                                                    
LLVMConstIntGetZExtValue(emit_data->args[3]),
                                                    can_speculate);
                emit_data->output[emit_data->chan] =
                        ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
        } else {
-               ac_get_image_intr_name("llvm.amdgcn.image.load",
-                                      emit_data->dst_type,             /* 
vdata */
-                                      LLVMTypeOf(emit_data->args[0]), /* 
coords */
-                                      LLVMTypeOf(emit_data->args[1]), /* rsrc 
*/
-                                      intrinsic_name, sizeof(intrinsic_name));
+               struct ac_image_args args = {};
+               args.opcode = ac_image_load;
+               args.resource = emit_data->args[0];
+               memcpy(args.coords, &emit_data->args[1], sizeof(args.coords));
+               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
+               if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE))
+                       args.cache_policy = ac_glc;
+               args.attributes = ac_get_load_intr_attribs(can_speculate);
+               args.dmask = 0xf;
 
                emit_data->output[emit_data->chan] =
-                       lp_build_intrinsic(
-                               builder, intrinsic_name, emit_data->dst_type,
-                               emit_data->args, emit_data->arg_count,
-                               ac_get_load_intr_attribs(can_speculate));
+                       ac_build_image_opcode(&ctx->ac, &args);
        }
 }
 
 static void store_fetch_args(
                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction * inst = emit_data->inst;
        struct tgsi_full_src_register memory;
@@ -660,43 +612,37 @@ static void store_fetch_args(
                rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
                offset = ac_to_integer(&ctx->ac, tmp);
 
                buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, false, false);
        } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
                unsigned target = inst->Memory.Texture;
-               LLVMValueRef coords;
 
                /* 8bit/16bit TC L1 write corruption bug on SI.
                 * All store opcodes not aligned to a dword are affected.
                 *
                 * The only way to get unaligned stores in radeonsi is through
                 * shader images.
                 */
                bool force_glc = ctx->screen->info.chip_class == SI;
 
                image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
-               coords = image_fetch_coords(bld_base, inst, 0, rsrc);
+               image_fetch_coords(bld_base, inst, 0, rsrc, 
&emit_data->args[2]);
 
                if (target == TGSI_TEXTURE_BUFFER) {
-                       buffer_append_args(ctx, emit_data, rsrc, coords,
+                       buffer_append_args(ctx, emit_data, rsrc, 
emit_data->args[2],
                                           ctx->i32_0, false, force_glc);
                } else {
-                       emit_data->args[1] = coords;
-                       emit_data->args[2] = rsrc;
-                       emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* 
dmask */
-                       emit_data->arg_count = 4;
-
-                       image_append_args(ctx, emit_data, target, false, 
force_glc);
+                       emit_data->args[1] = rsrc;
                }
        }
 }
 
 static void store_emit_buffer(
                struct si_shader_context *ctx,
                struct lp_build_emit_data *emit_data,
                bool writeonly_memory)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
@@ -792,21 +738,20 @@ static void store_emit_memory(
 static void store_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        const struct tgsi_shader_info *info = &ctx->shader->selector->info;
        unsigned target = inst->Memory.Texture;
-       char intrinsic_name[64];
        bool writeonly_memory = false;
 
        if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
                store_emit_memory(ctx, emit_data);
                return;
        }
 
        if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
                ac_build_waitcnt(&ctx->ac, VM_CNT);
 
@@ -821,31 +766,39 @@ static void store_emit(
                return;
        }
 
        if (target == TGSI_TEXTURE_BUFFER) {
                emit_data->output[emit_data->chan] = lp_build_intrinsic(
                        builder, "llvm.amdgcn.buffer.store.format.v4f32",
                        emit_data->dst_type, emit_data->args,
                        emit_data->arg_count,
                        ac_get_store_intr_attribs(writeonly_memory));
        } else {
-               ac_get_image_intr_name("llvm.amdgcn.image.store",
-                                      LLVMTypeOf(emit_data->args[0]), /* vdata 
*/
-                                      LLVMTypeOf(emit_data->args[1]), /* 
coords */
-                                      LLVMTypeOf(emit_data->args[2]), /* rsrc 
*/
-                                      intrinsic_name, sizeof(intrinsic_name));
+               struct ac_image_args args = {};
+               args.opcode = ac_image_store;
+               args.data[0] = emit_data->args[0];
+               args.resource = emit_data->args[1];
+               memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
+               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
+               args.attributes = ac_get_store_intr_attribs(writeonly_memory);
+               args.dmask = 0xf;
+
+               /* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
+                * All store opcodes not aligned to a dword are affected.
+                */
+               bool force_glc = ctx->screen->info.chip_class == SI;
+               if (force_glc ||
+                   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE))
+                       args.cache_policy = ac_glc;
 
                emit_data->output[emit_data->chan] =
-                       lp_build_intrinsic(
-                               builder, intrinsic_name, emit_data->dst_type,
-                               emit_data->args, emit_data->arg_count,
-                               ac_get_store_intr_attribs(writeonly_memory));
+                       ac_build_image_opcode(&ctx->ac, &args);
        }
 }
 
 static void atomic_fetch_args(
                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef data1, data2;
@@ -875,33 +828,31 @@ static void atomic_fetch_args(
                rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
                offset = ac_to_integer(&ctx->ac, tmp);
 
                buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, true, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
                unsigned target = inst->Memory.Texture;
-               LLVMValueRef coords;
 
                image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
-               coords = image_fetch_coords(bld_base, inst, 1, rsrc);
+               image_fetch_coords(bld_base, inst, 1, rsrc,
+                                  &emit_data->args[emit_data->arg_count + 1]);
 
                if (target == TGSI_TEXTURE_BUFFER) {
-                       buffer_append_args(ctx, emit_data, rsrc, coords,
+                       buffer_append_args(ctx, emit_data, rsrc,
+                                          emit_data->args[emit_data->arg_count 
+ 1],
                                           ctx->i32_0, true, false);
                } else {
-                       emit_data->args[emit_data->arg_count++] = coords;
-                       emit_data->args[emit_data->arg_count++] = rsrc;
-
-                       image_append_args(ctx, emit_data, target, true, false);
+                       emit_data->args[emit_data->arg_count] = rsrc;
                }
        }
 }
 
 static void atomic_emit_memory(struct si_shader_context *ctx,
                                struct lp_build_emit_data *emit_data) {
        LLVMBuilderRef builder = ctx->ac.builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef ptr, result, arg;
 
@@ -966,51 +917,68 @@ static void atomic_emit_memory(struct si_shader_context 
*ctx,
 }
 
 static void atomic_emit(
                const struct lp_build_tgsi_action *action,
                struct lp_build_tgsi_context *bld_base,
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = ctx->ac.builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
-       char intrinsic_name[40];
        LLVMValueRef tmp;
 
        if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
                atomic_emit_memory(ctx, emit_data);
                return;
        }
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
            inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
+               char intrinsic_name[40];
                snprintf(intrinsic_name, sizeof(intrinsic_name),
                         "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
+               tmp = lp_build_intrinsic(
+                       builder, intrinsic_name, ctx->i32,
+                       emit_data->args, emit_data->arg_count, 0);
+               emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
        } else {
-               LLVMValueRef coords;
-               char coords_type[8];
+               unsigned num_data = inst->Instruction.Opcode == 
TGSI_OPCODE_ATOMCAS ? 2 : 1;
+               struct ac_image_args args = {};
 
-               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
-                       coords = emit_data->args[2];
-               else
-                       coords = emit_data->args[1];
+               if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+                       args.opcode = ac_image_atomic_cmpswap;
+               } else {
+                       args.opcode = ac_image_atomic;
+                       switch (inst->Instruction.Opcode) {
+                       case TGSI_OPCODE_ATOMXCHG: args.atomic = 
ac_atomic_swap; break;
+                       case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; 
break;
+                       case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; 
break;
+                       case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; 
break;
+                       case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; 
break;
+                       case TGSI_OPCODE_ATOMUMIN: args.atomic = 
ac_atomic_umin; break;
+                       case TGSI_OPCODE_ATOMUMAX: args.atomic = 
ac_atomic_umax; break;
+                       case TGSI_OPCODE_ATOMIMIN: args.atomic = 
ac_atomic_smin; break;
+                       case TGSI_OPCODE_ATOMIMAX: args.atomic = 
ac_atomic_smax; break;
+                       default: unreachable("unhandled image atomic");
+                       }
+               }
 
-               ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, 
sizeof(coords_type));
-               snprintf(intrinsic_name, sizeof(intrinsic_name),
-                        "llvm.amdgcn.image.atomic.%s.%s",
-                        action->intr_name, coords_type);
-       }
+               for (unsigned i = 0; i < num_data; ++i)
+                       args.data[i] = emit_data->args[i];
 
-       tmp = lp_build_intrinsic(
-               builder, intrinsic_name, ctx->i32,
-               emit_data->args, emit_data->arg_count, 0);
-       emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
+               args.resource = emit_data->args[num_data];
+               memcpy(args.coords, &emit_data->args[num_data + 1], 
sizeof(args.coords));
+               args.dim = ac_image_dim_from_tgsi_target(ctx->screen, 
inst->Memory.Texture);
+
+               emit_data->output[emit_data->chan] =
+                       ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, 
&args));
+       }
 }
 
 static void set_tex_fetch_args(struct si_shader_context *ctx,
                               struct lp_build_emit_data *emit_data,
                               struct ac_image_args *args,
                               unsigned target)
 {
        args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
        args->unorm = target == TGSI_TEXTURE_RECT ||
                      target == TGSI_TEXTURE_SHADOWRECT;
@@ -1809,22 +1777,22 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
                assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
 
                if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
                    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
                        gather4_int_result_workaround =
                                si_lower_gather4_integer(ctx, &args, target,
                                                         
inst->Texture.ReturnType);
                }
        }
 
-       LLVMValueRef result =
-               ac_build_image_opcode(&ctx->ac, &args);
+       args.attributes = AC_FUNC_ATTR_READNONE;
+       LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
 
        if (gather4_int_result_workaround) {
                result = si_fix_gather4_integer_result(ctx, result,
                                                       inst->Texture.ReturnType,
                                                       
gather4_int_result_workaround);
        }
 
        emit_data->output[emit_data->chan] = result;
 }
 
-- 
2.14.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] radeonsi: generate image load/store/atomic ops using ac_build_image_opcode

Reply via email to