From: Marek Olšák <[email protected]>

---
 src/amd/common/ac_llvm_build.c                      |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c            | 18 ++++++++----------
 src/gallium/drivers/radeonsi/si_shader_internal.h   |  1 -
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c |  1 -
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 209dfdd..171016b 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -752,21 +752,21 @@ LLVMValueRef ac_build_buffer_load_format(struct 
ac_llvm_context *ctx,
                                          ctx->v4f32, args, ARRAY_SIZE(args),
                                          /* READNONE means writes can't
                                           * affect it, while READONLY means
                                           * that writes can affect it. */
                                          readonly_memory && HAVE_LLVM >= 
0x0400 ?
                                                  AC_FUNC_ATTR_READNONE :
                                                  AC_FUNC_ATTR_READONLY);
        }
 
        LLVMValueRef args[] = {
-               rsrc,
+               LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
                voffset,
                vindex,
        };
        return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
                                  ctx->v4f32, args, 3,
                                  AC_FUNC_ATTR_READNONE |
                                  AC_FUNC_ATTR_LEGACY);
 }
 
 /**
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 77dd6b1..3ac1ef4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1378,21 +1378,21 @@ static LLVMValueRef get_sample_id(struct 
si_shader_context *ctx)
 /**
  * Load a dword from a constant buffer.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
                                      LLVMValueRef resource,
                                      LLVMValueRef offset)
 {
        LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef args[2] = {resource, offset};
 
-       return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, 
args, 2,
+       return lp_build_intrinsic(builder, "llvm.SI.load.const.v4i32", 
ctx->f32, args, 2,
                                  LP_FUNC_ATTR_READNONE |
                                  LP_FUNC_ATTR_LEGACY);
 }
 
 static LLVMValueRef load_sample_position(struct si_shader_context *ctx, 
LLVMValueRef sample_id)
 {
        struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef desc = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
@@ -4666,22 +4666,21 @@ static void tex_fetch_args(
        unsigned chan;
        unsigned num_deriv_channels = 0;
        bool has_offset = inst->Texture.NumOffsets > 0;
        LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
        unsigned dmask = 0xf;
 
        tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
 
        if (target == TGSI_TEXTURE_BUFFER) {
                emit_data->dst_type = ctx->v4f32;
-               emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
-                                                     ctx->v16i8, "");
+               emit_data->args[0] = res_ptr;
                emit_data->args[1] = ctx->i32_0;
                emit_data->args[2] = lp_build_emit_fetch(bld_base, 
emit_data->inst, 0, TGSI_CHAN_X);
                emit_data->arg_count = 3;
                return;
        }
 
        /* Fetch and project texture coordinates */
        coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 
TGSI_CHAN_W);
        for (chan = 0; chan < 3; chan++ ) {
                coords[chan] = lp_build_emit_fetch(bld_base,
@@ -5835,48 +5834,48 @@ static unsigned si_get_max_workgroup_size(struct 
si_shader *shader)
                max_work_group_size = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
        }
        return max_work_group_size;
 }
 
 static void declare_per_stage_desc_pointers(struct si_shader_context *ctx,
                                            LLVMTypeRef *params,
                                            unsigned *num_params,
                                            bool assign_params)
 {
-       params[(*num_params)++] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
+       params[(*num_params)++] = const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS);
        params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
        params[(*num_params)++] = const_array(ctx->v8i32, SI_NUM_IMAGES);
        params[(*num_params)++] = const_array(ctx->v4i32, 
SI_NUM_SHADER_BUFFERS);
 
        if (assign_params) {
                ctx->param_const_buffers  = *num_params - 4;
                ctx->param_samplers       = *num_params - 3;
                ctx->param_images         = *num_params - 2;
                ctx->param_shader_buffers = *num_params - 1;
        }
 }
 
 static void declare_default_desc_pointers(struct si_shader_context *ctx,
                                          LLVMTypeRef *params,
                                          unsigned *num_params)
 {
        params[ctx->param_rw_buffers = (*num_params)++] =
-               const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+               const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
        declare_per_stage_desc_pointers(ctx, params, num_params, true);
 }
 
 static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
                                            LLVMTypeRef *params,
                                            unsigned *num_params)
 {
        params[ctx->param_vertex_buffers = (*num_params)++] =
-               const_array(ctx->v16i8, SI_NUM_VERTEX_BUFFERS);
+               const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS);
        params[ctx->param_base_vertex = (*num_params)++] = ctx->i32;
        params[ctx->param_start_instance = (*num_params)++] = ctx->i32;
        params[ctx->param_draw_id = (*num_params)++] = ctx->i32;
        params[ctx->param_vs_state_bits = (*num_params)++] = ctx->i32;
 }
 
 static void declare_vs_input_vgprs(struct si_shader_context *ctx,
                                   LLVMTypeRef *params, unsigned *num_params,
                                   unsigned *num_prolog_vgprs)
 {
@@ -5984,21 +5983,21 @@ static void create_function(struct si_shader_context 
*ctx)
                 */
                for (i = 0; i < GFX6_TCS_NUM_USER_SGPR + 2; i++)
                        returns[num_returns++] = ctx->i32; /* SGPRs */
                for (i = 0; i < 3; i++)
                        returns[num_returns++] = ctx->f32; /* VGPRs */
                break;
 
        case SI_SHADER_MERGED_VERTEX_TESSCTRL:
                /* Merged stages have 8 system SGPRs at the beginning. */
                params[ctx->param_rw_buffers = num_params++] = /* 
SPI_SHADER_USER_DATA_ADDR_LO_HS */
-                       const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+                       const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
                params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
                params[ctx->param_tcs_factor_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_scratch_offset = num_params++] = 
ctx->i32;
                params[num_params++] = ctx->i32; /* unused */
                params[num_params++] = ctx->i32; /* unused */
 
                params[num_params++] = ctx->i32; /* unused */
                params[num_params++] = ctx->i32; /* unused */
                declare_per_stage_desc_pointers(ctx, params, &num_params,
@@ -6039,21 +6038,21 @@ static void create_function(struct si_shader_context 
*ctx)
                        for (i = 0; i <= 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K; 
i++)
                                returns[num_returns++] = ctx->i32; /* SGPRs */
                        for (i = 0; i < 3; i++)
                                returns[num_returns++] = ctx->f32; /* VGPRs */
                }
                break;
 
        case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
                /* Merged stages have 8 system SGPRs at the beginning. */
                params[ctx->param_rw_buffers = num_params++] = /* 
SPI_SHADER_USER_DATA_ADDR_LO_GS */
-                       const_array(ctx->v16i8, SI_NUM_RW_BUFFERS);
+                       const_array(ctx->v4i32, SI_NUM_RW_BUFFERS);
                params[ctx->param_gs2vs_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_wave_info = num_params++] = ctx->i32;
                params[ctx->param_tcs_offchip_offset = num_params++] = ctx->i32;
                params[ctx->param_merged_scratch_offset = num_params++] = 
ctx->i32;
                params[num_params++] = ctx->i32; /* unused 
(SPI_SHADER_PGM_LO/HI_GS << 8) */
                params[num_params++] = ctx->i32; /* unused 
(SPI_SHADER_PGM_LO/HI_GS >> 24) */
 
                params[num_params++] = ctx->i32; /* unused */
                params[num_params++] = ctx->i32; /* unused */
                declare_per_stage_desc_pointers(ctx, params, &num_params,
@@ -6358,21 +6357,20 @@ static void preload_ring_buffers(struct 
si_shader_context *ctx)
                                             
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                                             
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
                                             
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
                                             
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                                             
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
                                             S_008F0C_ELEMENT_SIZE(1) | /* 
element_size = 4 (bytes) */
                                             S_008F0C_INDEX_STRIDE(1) | /* 
index_stride = 16 (elements) */
                                             S_008F0C_ADD_TID_ENABLE(1),
                                             0),
                                LLVMConstInt(ctx->i32, 3, 0), "");
-                       ring = LLVMBuildBitCast(builder, ring, ctx->v16i8, "");
 
                        ctx->gsvs_ring[stream] = ring;
                }
        }
 }
 
 static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
                                         LLVMValueRef param_rw_buffers,
                                         unsigned param_pos_fixed_pt)
 {
@@ -8691,21 +8689,21 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
                unsigned pos = key->ps_prolog.num_input_sgprs +
                               key->ps_prolog.num_input_vgprs - 1;
                LLVMValueRef ptr[2], list;
 
                /* Get the pointer to rw buffers. */
                ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
                ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
                list = lp_build_gather_values(gallivm, ptr, 2);
                list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
                list = LLVMBuildIntToPtr(gallivm->builder, list,
-                                         const_array(ctx->v16i8, 
SI_NUM_RW_BUFFERS), "");
+                                         const_array(ctx->v4i32, 
SI_NUM_RW_BUFFERS), "");
 
                si_llvm_emit_polygon_stipple(ctx, list, pos);
        }
 
        if (key->ps_prolog.states.bc_optimize_for_persp ||
            key->ps_prolog.states.bc_optimize_for_linear) {
                unsigned i, base = key->ps_prolog.num_input_sgprs;
                LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
 
                /* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h 
b/src/gallium/drivers/radeonsi/si_shader_internal.h
index cad2db3..03bf83d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -215,21 +215,20 @@ struct si_shader_context {
        LLVMValueRef gs_next_vertex[4];
        LLVMValueRef return_value;
 
        LLVMTypeRef voidt;
        LLVMTypeRef i1;
        LLVMTypeRef i8;
        LLVMTypeRef i32;
        LLVMTypeRef i64;
        LLVMTypeRef i128;
        LLVMTypeRef f32;
-       LLVMTypeRef v16i8;
        LLVMTypeRef v2i32;
        LLVMTypeRef v4i32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
 
        LLVMValueRef i32_0;
        LLVMValueRef i32_1;
 
        LLVMValueRef shared_memory;
 };
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index c733f5a..66b1916 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1334,21 +1334,20 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 
        si_shader_context_init_alu(&ctx->bld_base);
 
        ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
        ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
        ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
        ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
        ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
        ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
        ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
-       ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
        ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
        ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
        ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
        ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
 
        ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
        ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
 }
 
 /* Set the context to a certain TGSI shader. Can be called repeatedly
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to