On Mon, Jan 8, 2018 at 10:31 PM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
>
>
> On 01/06/2018 12:12 PM, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.ol...@amd.com>
>>
>> SGPRS: 2170102 -> 2158430 (-0.54 %)
>> VGPRS: 1645656 -> 1641516 (-0.25 %)
>> Spilled SGPRs: 9078 -> 8810 (-2.95 %)
>> Spilled VGPRs: 130 -> 114 (-12.31 %)
>> Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread
>> Code Size: 52094872 -> 52692540 (1.15 %) bytes
>
>
> These numbers are quite nice, great work! I think it's something I would
> like to implement for RADV.
>
> Just one minor nitpick below.
>
> Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
>
>
>> ---
>>   src/amd/common/ac_llvm_build.c                    |  13 +++
>>   src/amd/common/ac_llvm_build.h                    |   5 +
>>   src/gallium/drivers/radeonsi/si_descriptors.c     |  10 +-
>>   src/gallium/drivers/radeonsi/si_shader.c          | 115
>> +++++++++++++---------
>>   src/gallium/drivers/radeonsi/si_shader.h          |  23 ++++-
>>   src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c |   6 +-
>>   6 files changed, 122 insertions(+), 50 deletions(-)
>>
>> diff --git a/src/amd/common/ac_llvm_build.c
>> b/src/amd/common/ac_llvm_build.c
>> index ed00d20..02d1b39 100644
>> --- a/src/amd/common/ac_llvm_build.c
>> +++ b/src/amd/common/ac_llvm_build.c
>> @@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
>> LLVMContextRef context,
>>         ctx->context = context;
>>         ctx->module = NULL;
>>         ctx->builder = NULL;
>>         ctx->voidt = LLVMVoidTypeInContext(ctx->context);
>>         ctx->i1 = LLVMInt1TypeInContext(ctx->context);
>>         ctx->i8 = LLVMInt8TypeInContext(ctx->context);
>>         ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
>>         ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
>>         ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
>> +       ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
>>         ctx->f16 = LLVMHalfTypeInContext(ctx->context);
>>         ctx->f32 = LLVMFloatTypeInContext(ctx->context);
>>         ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
>>         ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
>>         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
>>         ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
>>         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
>>         ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
>>         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
>>         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
>> @@ -128,21 +129,24 @@ unsigned
>>   ac_get_type_size(LLVMTypeRef type)
>>   {
>>         LLVMTypeKind kind = LLVMGetTypeKind(type);
>>         switch (kind) {
>>         case LLVMIntegerTypeKind:
>>                 return LLVMGetIntTypeWidth(type) / 8;
>>         case LLVMFloatTypeKind:
>>                 return 4;
>>         case LLVMDoubleTypeKind:
>> +               return 8;
>>         case LLVMPointerTypeKind:
>> +               if (LLVMGetPointerAddressSpace(type) ==
>> AC_CONST_32BIT_ADDR_SPACE)
>> +                       return 4;
>>                 return 8;
>>         case LLVMVectorTypeKind:
>>                 return LLVMGetVectorSize(type) *
>>                        ac_get_type_size(LLVMGetElementType(type));
>>         case LLVMArrayTypeKind:
>>                 return LLVMGetArrayLength(type) *
>>                        ac_get_type_size(LLVMGetElementType(type));
>>         default:
>>                 assert(0);
>>                 return 0;
>> @@ -2035,10 +2039,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context
>> *ctx,
>>                                                            LLVMIntEQ,
>> src0,
>>                                                            ctx->i32_0,
>> ""),
>>                                LLVMConstInt(ctx->i32, -1, 0), lsb, "");
>>   }
>>     LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
>>   {
>>         return LLVMPointerType(LLVMArrayType(elem_type, 0),
>>                                AC_CONST_ADDR_SPACE);
>>   }
>> +
>> +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
>> +{
>> +       if (!HAVE_32BIT_POINTERS)
>> +               return ac_array_in_const_addr_space(elem_type);
>> +
>> +       return LLVMPointerType(LLVMArrayType(elem_type, 0),
>> +                              AC_CONST_32BIT_ADDR_SPACE);
>> +}
>> diff --git a/src/amd/common/ac_llvm_build.h
>> b/src/amd/common/ac_llvm_build.h
>> index b1c4737..5235664 100644
>> --- a/src/amd/common/ac_llvm_build.h
>> +++ b/src/amd/common/ac_llvm_build.h
>> @@ -27,36 +27,40 @@
>>     #include <stdbool.h>
>>   #include <llvm-c/TargetMachine.h>
>>     #include "amd_family.h"
>>     #ifdef __cplusplus
>>   extern "C" {
>>   #endif
>>   +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600)
>> +
>>   enum {
>>         AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that
>> selects SMEM loads */
>>         AC_LOCAL_ADDR_SPACE = 3,
>> +       AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer
>> type has 32 bits */
>>   };
>>     struct ac_llvm_context {
>>         LLVMContextRef context;
>>         LLVMModuleRef module;
>>         LLVMBuilderRef builder;
>>         LLVMTypeRef voidt;
>>         LLVMTypeRef i1;
>>         LLVMTypeRef i8;
>>         LLVMTypeRef i16;
>>         LLVMTypeRef i32;
>>         LLVMTypeRef i64;
>> +       LLVMTypeRef intptr;
>>         LLVMTypeRef f16;
>>         LLVMTypeRef f32;
>>         LLVMTypeRef f64;
>>         LLVMTypeRef v2i16;
>>         LLVMTypeRef v2i32;
>>         LLVMTypeRef v3i32;
>>         LLVMTypeRef v4i32;
>>         LLVMTypeRef v2f32;
>>         LLVMTypeRef v4f32;
>>         LLVMTypeRef v8i32;
>> @@ -331,16 +335,17 @@ void ac_declare_lds_as_pointer(struct
>> ac_llvm_context *ac);
>>   LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
>>                          LLVMValueRef dw_addr);
>>   void ac_lds_store(struct ac_llvm_context *ctx,
>>                   LLVMValueRef dw_addr, LLVMValueRef value);
>>     LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
>>                          LLVMTypeRef dst_type,
>>                          LLVMValueRef src0);
>>     LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
>> +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
>>     #ifdef __cplusplus
>>   }
>>   #endif
>>     #endif
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c
>> b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index b372090..810169d 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -1996,31 +1996,35 @@ void si_shader_change_notify(struct si_context
>> *sctx)
>>         } else {
>>                 si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
>>         }
>>   }
>>     static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs,
>>                                         struct si_descriptors *desc,
>>                                         unsigned sh_base,
>>                                         unsigned pointer_count)
>>   {
>> -       radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0));
>> +       radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count *
>> (HAVE_32BIT_POINTERS ? 1 : 2), 0));
>>         radeon_emit(cs, (sh_base + desc->shader_userdata_offset -
>> SI_SH_REG_OFFSET) >> 2);
>>   }
>>     static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs,
>>                                         struct si_descriptors *desc)
>>   {
>>         uint64_t va = desc->gpu_address;
>>         radeon_emit(cs, va);
>> -       radeon_emit(cs, va >> 32);
>> +
>> +       if (HAVE_32BIT_POINTERS)
>> +               assert(va <= 0xffffffff);
>> +       else
>> +               radeon_emit(cs, va >> 32);
>>   }
>>     static void si_emit_shader_pointer(struct si_context *sctx,
>>                                    struct si_descriptors *desc,
>>                                    unsigned sh_base)
>>   {
>>         struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>>         si_emit_shader_pointer_head(cs, desc, sh_base, 1);
>>         si_emit_shader_pointer_body(cs, desc);
>> @@ -2553,22 +2557,24 @@ void si_all_resident_buffers_begin_new_cs(struct
>> si_context *sctx)
>>         sctx->b.num_resident_handles += num_resident_tex_handles +
>>                                         num_resident_img_handles;
>>   }
>>     /* INIT/DEINIT/UPLOAD */
>>     void si_init_all_descriptors(struct si_context *sctx)
>>   {
>>         int i;
>>   +#if !HAVE_32BIT_POINTERS
>>         STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0);
>>         STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0);
>> +#endif
>>         for (i = 0; i < SI_NUM_SHADERS; i++) {
>>                 bool gfx9_tcs = false;
>>                 bool gfx9_gs = false;
>>                 unsigned num_sampler_slots = SI_NUM_IMAGES / 2 +
>> SI_NUM_SAMPLERS;
>>                 unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS +
>> SI_NUM_CONST_BUFFERS;
>>                 struct si_descriptors *desc;
>>                 if (sctx->b.chip_class >= GFX9) {
>>                         gfx9_tcs = i == PIPE_SHADER_TESS_CTRL;
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c
>> b/src/gallium/drivers/radeonsi/si_shader.c
>> index 760e742..57336ff 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -3144,26 +3144,32 @@ si_insert_input_ret_float(struct si_shader_context
>> *ctx, LLVMValueRef ret,
>>   {
>>         LLVMBuilderRef builder = ctx->ac.builder;
>>         LLVMValueRef p = LLVMGetParam(ctx->main_fn, param);
>>         return LLVMBuildInsertValue(builder, ret,
>>                                     ac_to_float(&ctx->ac, p),
>>                                     return_index, "");
>>   }
>>     static LLVMValueRef
>> -si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef
>> ret,
>> -                            unsigned param, unsigned return_index)
>> +si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
>> +                   unsigned param, unsigned return_index)
>>   {
>>         LLVMBuilderRef builder = ctx->ac.builder;
>>         LLVMValueRef ptr, lo, hi;
>>   +     if (HAVE_32BIT_POINTERS) {
>> +               ptr = LLVMGetParam(ctx->main_fn, param);
>> +               ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
>> +               return LLVMBuildInsertValue(builder, ret, ptr,
>> return_index, "");
>> +       }
>> +
>>         ptr = LLVMGetParam(ctx->main_fn, param);
>>         ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
>>         ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
>>         lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
>>         hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
>>         ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
>>         return LLVMBuildInsertValue(builder, ret, hi, return_index + 1,
>> "");
>>   }
>>     /* This only writes the tessellation factor levels. */
>> @@ -3265,75 +3271,76 @@ static void si_llvm_emit_tcs_epilogue(struct
>> ac_shader_abi *abi,
>>   /* Pass TCS inputs from LS to TCS on GFX9. */
>>   static void si_set_ls_return_value_for_tcs(struct si_shader_context
>> *ctx)
>>   {
>>         LLVMValueRef ret = ctx->return_value;
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset,
>> 2);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info,
>> 3);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset,
>> 4);
>>         ret = si_insert_input_ret(ctx, ret,
>> ctx->param_merged_scratch_offset, 5);
>>   -     ret = si_insert_input_ptr_as_2xi32(ctx, ret,
>> ctx->param_rw_buffers,
>> -                                          8 + SI_SGPR_RW_BUFFERS);
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret,
>> -               ctx->param_bindless_samplers_and_images,
>> -               8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
>> +       ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
>> +                                 8 + SI_SGPR_RW_BUFFERS);
>> +       ret = si_insert_input_ptr(ctx, ret,
>> +                                 ctx->param_bindless_samplers_and_images,
>> +                                 8 +
>> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
>>                                   8 + SI_SGPR_VS_STATE_BITS);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
>>                                   8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
>>         ret = si_insert_input_ret(ctx, ret,
>> ctx->param_tcs_out_lds_offsets,
>>                                   8 + GFX9_SGPR_TCS_OUT_OFFSETS);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout,
>>                                   8 + GFX9_SGPR_TCS_OUT_LAYOUT);
>>         ret = si_insert_input_ret(ctx, ret,
>> ctx->param_tcs_offchip_addr_base64k,
>>                                   8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K);
>>         ret = si_insert_input_ret(ctx, ret,
>> ctx->param_tcs_factor_addr_base64k,
>>                                   8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K);
>>   -     unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2;
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
>> -                                          8 +
>> GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
>> -                                          8 +
>> GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
>> +       unsigned desc_param = ctx->param_tcs_factor_addr_base64k +
>> +                             (HAVE_32BIT_POINTERS ? 1 : 2);
>> +       ret = si_insert_input_ptr(ctx, ret, desc_param,
>> +                                 8 +
>> GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
>> +       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
>> +                                 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
>>         unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
>>         ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
>>                                    ac_to_float(&ctx->ac,
>> ctx->abi.tcs_patch_id),
>>                                    vgpr++, "");
>>         ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
>>                                    ac_to_float(&ctx->ac,
>> ctx->abi.tcs_rel_ids),
>>                                    vgpr++, "");
>>         ctx->return_value = ret;
>>   }
>>     /* Pass GS inputs from ES to GS on GFX9. */
>>   static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
>>   {
>>         LLVMValueRef ret = ctx->return_value;
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
>>         ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info,
>> 3);
>>         ret = si_insert_input_ret(ctx, ret,
>> ctx->param_merged_scratch_offset, 5);
>>   -     ret = si_insert_input_ptr_as_2xi32(ctx, ret,
>> ctx->param_rw_buffers,
>> -                                          8 + SI_SGPR_RW_BUFFERS);
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret,
>> -               ctx->param_bindless_samplers_and_images,
>> -               8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
>> +       ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers,
>> +                                 8 + SI_SGPR_RW_BUFFERS);
>> +       ret = si_insert_input_ptr(ctx, ret,
>> +                                 ctx->param_bindless_samplers_and_images,
>> +                                 8 +
>> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
>>         unsigned desc_param = ctx->param_vs_state_bits + 1;
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
>> -                                          8 +
>> GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
>> -       ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
>> -                                          8 +
>> GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
>> +       ret = si_insert_input_ptr(ctx, ret, desc_param,
>> +                                 8 +
>> GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
>> +       ret = si_insert_input_ptr(ctx, ret, desc_param + 1,
>> +                                 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
>>         unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
>>         for (unsigned i = 0; i < 5; i++) {
>>                 unsigned param = ctx->param_gs_vtx01_offset + i;
>>                 ret = si_insert_input_ret_float(ctx, ret, param, vgpr++);
>>         }
>>         ctx->return_value = ret;
>>   }
>>     static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
>> @@ -4424,46 +4431,46 @@ static void declare_per_stage_desc_pointers(struct
>> si_shader_context *ctx,
>>         LLVMTypeRef const_shader_buf_type;
>>         if (ctx->shader->selector->info.const_buffers_declared == 1 &&
>>             ctx->shader->selector->info.shader_buffers_declared == 0)
>>                 const_shader_buf_type = ctx->f32;
>>         else
>>                 const_shader_buf_type = ctx->v4i32;
>>         unsigned const_and_shader_buffers =
>>                 add_arg(fninfo, ARG_SGPR,
>> -
>> ac_array_in_const_addr_space(const_shader_buf_type));
>> +
>> ac_array_in_const32_addr_space(const_shader_buf_type));
>>         unsigned samplers_and_images =
>>                 add_arg(fninfo, ARG_SGPR,
>> -                       ac_array_in_const_addr_space(ctx->v8i32));
>> +                       ac_array_in_const32_addr_space(ctx->v8i32));
>>         if (assign_params) {
>>                 ctx->param_const_and_shader_buffers =
>> const_and_shader_buffers;
>>                 ctx->param_samplers_and_images = samplers_and_images;
>>         }
>>   }
>>     static void declare_global_desc_pointers(struct si_shader_context
>> *ctx,
>>                                          struct si_function_info *fninfo)
>>   {
>>         ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
>> -               ac_array_in_const_addr_space(ctx->v4i32));
>> +               ac_array_in_const32_addr_space(ctx->v4i32));
>>         ctx->param_bindless_samplers_and_images = add_arg(fninfo,
>> ARG_SGPR,
>> -               ac_array_in_const_addr_space(ctx->v8i32));
>> +               ac_array_in_const32_addr_space(ctx->v8i32));
>>   }
>>     static void declare_vs_specific_input_sgprs(struct si_shader_context
>> *ctx,
>>                                             struct si_function_info
>> *fninfo)
>>   {
>>         ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
>> -               ac_array_in_const_addr_space(ctx->v4i32));
>> +               ac_array_in_const32_addr_space(ctx->v4i32));
>>         add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
>>         add_arg_assign(fninfo, ARG_SGPR, ctx->i32,
>> &ctx->abi.start_instance);
>>         add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
>>         ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32);
>>   }
>>     static void declare_vs_input_vgprs(struct si_shader_context *ctx,
>>                                    struct si_function_info *fninfo,
>>                                    unsigned *num_prolog_vgprs)
>>   {
>> @@ -4617,21 +4624,22 @@ static void create_function(struct
>> si_shader_context *ctx)
>>                 declare_global_desc_pointers(ctx, &fninfo);
>>                 declare_per_stage_desc_pointers(ctx, &fninfo,
>>                                                 ctx->type ==
>> PIPE_SHADER_VERTEX);
>>                 declare_vs_specific_input_sgprs(ctx, &fninfo);
>>                 ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR,
>> ctx->i32);
>>                 ctx->param_tcs_out_lds_offsets = add_arg(&fninfo,
>> ARG_SGPR, ctx->i32);
>>                 ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR,
>> ctx->i32);
>>                 ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo,
>> ARG_SGPR, ctx->i32);
>>                 ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo,
>> ARG_SGPR, ctx->i32);
>> -               add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
>> +               if (!HAVE_32BIT_POINTERS)
>> +                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
>>                 declare_per_stage_desc_pointers(ctx, &fninfo,
>>                                                 ctx->type ==
>> PIPE_SHADER_TESS_CTRL);
>>                 /* VGPRs (first TCS, then VS) */
>>                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
>> &ctx->abi.tcs_patch_id);
>>                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
>> &ctx->abi.tcs_rel_ids);
>>                 if (ctx->type == PIPE_SHADER_VERTEX) {
>>                         declare_vs_input_vgprs(ctx, &fninfo,
>> @@ -4673,21 +4681,22 @@ static void create_function(struct
>> si_shader_context *ctx)
>>                                                  ctx->type ==
>> PIPE_SHADER_TESS_EVAL));
>>                 if (ctx->type == PIPE_SHADER_VERTEX) {
>>                         declare_vs_specific_input_sgprs(ctx, &fninfo);
>>                 } else {
>>                         /* TESS_EVAL (and also GEOMETRY):
>>                          * Declare as many input SGPRs as the VS has. */
>>                         ctx->param_tcs_offchip_layout = add_arg(&fninfo,
>> ARG_SGPR, ctx->i32);
>>                         ctx->param_tcs_offchip_addr_base64k =
>> add_arg(&fninfo, ARG_SGPR, ctx->i32);
>>                         add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
>>                         add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
>> -                       add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
>> +                       if (!HAVE_32BIT_POINTERS)
>> +                               add_arg(&fninfo, ARG_SGPR, ctx->i32); /*
>> unused */
>>                         ctx->param_vs_state_bits = add_arg(&fninfo,
>> ARG_SGPR, ctx->i32); /* unused */
>>                 }
>>                 declare_per_stage_desc_pointers(ctx, &fninfo,
>>                                                 ctx->type ==
>> PIPE_SHADER_GEOMETRY);
>>                 /* VGPRs (first GS, then VS/TES) */
>>                 ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR,
>> ctx->i32);
>>                 ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR,
>> ctx->i32);
>>                 add_arg_assign(&fninfo, ARG_VGPR, ctx->i32,
>> &ctx->abi.gs_prim_id);
>> @@ -6385,20 +6394,25 @@ static void si_build_wrapper_function(struct
>> si_shader_context *ctx,
>>         num_out = 0;
>>         num_out_sgpr = 0;
>>         for (unsigned i = 0; i < fninfo.num_params; ++i) {
>>                 LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
>>                 LLVMTypeRef param_type = LLVMTypeOf(param);
>>                 LLVMTypeRef out_type = i < fninfo.num_sgpr_params ?
>> ctx->i32 : ctx->f32;
>>                 unsigned size = ac_get_type_size(param_type) / 4;
>>                 if (size == 1) {
>> +                       if (LLVMGetTypeKind(param_type) ==
>> LLVMPointerTypeKind) {
>> +                               param = LLVMBuildPtrToInt(builder, param,
>> ctx->i32, "");
>> +                               param_type = ctx->i32;
>> +                       }
>> +
>>                         if (param_type != out_type)
>>                                 param = LLVMBuildBitCast(builder, param,
>> out_type, "");
>>                         out[num_out++] = param;
>>                 } else {
>>                         LLVMTypeRef vector_type = LLVMVectorType(out_type,
>> size);
>>                         if (LLVMGetTypeKind(param_type) ==
>> LLVMPointerTypeKind) {
>>                                 param = LLVMBuildPtrToInt(builder, param,
>> ctx->i64, "");
>>                                 param_type = ctx->i64;
>>                         }
>> @@ -6460,22 +6474,28 @@ static void si_build_wrapper_function(struct
>> si_shader_context *ctx,
>>                         assert(out_idx + param_size <= (is_sgpr ?
>> num_out_sgpr : num_out));
>>                         assert(is_sgpr || out_idx >= num_out_sgpr);
>>                         if (param_size == 1)
>>                                 arg = out[out_idx];
>>                         else
>>                                 arg =
>> lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size);
>>                         if (LLVMTypeOf(arg) != param_type) {
>>                                 if (LLVMGetTypeKind(param_type) ==
>> LLVMPointerTypeKind) {
>> -                                       arg = LLVMBuildBitCast(builder,
>> arg, ctx->i64, "");
>> -                                       arg = LLVMBuildIntToPtr(builder,
>> arg, param_type, "");
>> +                                       if
>> (LLVMGetPointerAddressSpace(param_type) ==
>> +                                           AC_CONST_32BIT_ADDR_SPACE) {
>> +                                               arg =
>> LLVMBuildBitCast(builder, arg, ctx->i32, "");
>> +                                               arg =
>> LLVMBuildIntToPtr(builder, arg, param_type, "");
>> +                                       } else {
>> +                                               arg =
>> LLVMBuildBitCast(builder, arg, ctx->i64, "");
>> +                                               arg =
>> LLVMBuildIntToPtr(builder, arg, param_type, "");
>> +                                       }
>>                                 } else {
>>                                         arg = LLVMBuildBitCast(builder,
>> arg, param_type, "");
>>                                 }
>>                         }
>>                         in[param_idx] = arg;
>>                         out_idx += param_size;
>>                 }
>>                 ret = LLVMBuildCall(builder, parts[part], in, num_params,
>> "");
>> @@ -6934,23 +6954,30 @@ out:
>>     static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context
>> *ctx)
>>   {
>>         LLVMValueRef ptr[2], list;
>>         bool is_merged_shader =
>>                 ctx->screen->info.chip_class >= GFX9 &&
>>                 (ctx->type == PIPE_SHADER_TESS_CTRL ||
>>                  ctx->type == PIPE_SHADER_GEOMETRY ||
>>                  ctx->shader->key.as_ls || ctx->shader->key.as_es);
>>   +     if (HAVE_32BIT_POINTERS) {
>> +               ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8
>> : 0) + SI_SGPR_RW_BUFFERS);
>> +               list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
>> +
>> ac_array_in_const32_addr_space(ctx->v4i32), "");
>> +               return list;
>> +       }
>> +
>>         /* Get the pointer to rw buffers. */
>>         ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
>> SI_SGPR_RW_BUFFERS);
>> -       ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
>> SI_SGPR_RW_BUFFERS_HI);
>> +       ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) +
>> SI_SGPR_RW_BUFFERS + 1);
>
>
> I think SI_SGPR_RW_BUFFERS_HI makes more sense here.

Yeah but I can't use the _HI definition here because it's not defined
if HAVE_32BIT_POINTERS.

Marek
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to