On Mon, Jan 8, 2018 at 10:31 PM, Samuel Pitoiset <samuel.pitoi...@gmail.com> wrote: > > > On 01/06/2018 12:12 PM, Marek Olšák wrote: >> >> From: Marek Olšák <marek.ol...@amd.com> >> >> SGPRS: 2170102 -> 2158430 (-0.54 %) >> VGPRS: 1645656 -> 1641516 (-0.25 %) >> Spilled SGPRs: 9078 -> 8810 (-2.95 %) >> Spilled VGPRs: 130 -> 114 (-12.31 %) >> Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread >> Code Size: 52094872 -> 52692540 (1.15 %) bytes > > > These numbers are quite nice, great work! I think it's something I would > like to implement for RADV. > > Just one minor nitpick below. > > Reviewed-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > > >> --- >> src/amd/common/ac_llvm_build.c | 13 +++ >> src/amd/common/ac_llvm_build.h | 5 + >> src/gallium/drivers/radeonsi/si_descriptors.c | 10 +- >> src/gallium/drivers/radeonsi/si_shader.c | 115 >> +++++++++++++--------- >> src/gallium/drivers/radeonsi/si_shader.h | 23 ++++- >> src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 6 +- >> 6 files changed, 122 insertions(+), 50 deletions(-) >> >> diff --git a/src/amd/common/ac_llvm_build.c >> b/src/amd/common/ac_llvm_build.c >> index ed00d20..02d1b39 100644 >> --- a/src/amd/common/ac_llvm_build.c >> +++ b/src/amd/common/ac_llvm_build.c >> @@ -57,20 +57,21 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, >> LLVMContextRef context, >> ctx->context = context; >> ctx->module = NULL; >> ctx->builder = NULL; >> ctx->voidt = LLVMVoidTypeInContext(ctx->context); >> ctx->i1 = LLVMInt1TypeInContext(ctx->context); >> ctx->i8 = LLVMInt8TypeInContext(ctx->context); >> ctx->i16 = LLVMIntTypeInContext(ctx->context, 16); >> ctx->i32 = LLVMIntTypeInContext(ctx->context, 32); >> ctx->i64 = LLVMIntTypeInContext(ctx->context, 64); >> + ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64; >> ctx->f16 = LLVMHalfTypeInContext(ctx->context); >> ctx->f32 = LLVMFloatTypeInContext(ctx->context); >> ctx->f64 = LLVMDoubleTypeInContext(ctx->context); >> ctx->v2i16 = LLVMVectorType(ctx->i16, 2); >> ctx->v2i32 = LLVMVectorType(ctx->i32, 2); >> ctx->v3i32 = LLVMVectorType(ctx->i32, 3); >> ctx->v4i32 = LLVMVectorType(ctx->i32, 4); >> ctx->v2f32 = LLVMVectorType(ctx->f32, 2); >> ctx->v4f32 = LLVMVectorType(ctx->f32, 4); >> ctx->v8i32 = LLVMVectorType(ctx->i32, 8); >> @@ -128,21 +129,24 @@ unsigned >> ac_get_type_size(LLVMTypeRef type) >> { >> LLVMTypeKind kind = LLVMGetTypeKind(type); >> switch (kind) { >> case LLVMIntegerTypeKind: >> return LLVMGetIntTypeWidth(type) / 8; >> case LLVMFloatTypeKind: >> return 4; >> case LLVMDoubleTypeKind: >> + return 8; >> case LLVMPointerTypeKind: >> + if (LLVMGetPointerAddressSpace(type) == >> AC_CONST_32BIT_ADDR_SPACE) >> + return 4; >> return 8; >> case LLVMVectorTypeKind: >> return LLVMGetVectorSize(type) * >> ac_get_type_size(LLVMGetElementType(type)); >> case LLVMArrayTypeKind: >> return LLVMGetArrayLength(type) * >> ac_get_type_size(LLVMGetElementType(type)); >> default: >> assert(0); >> return 0; >> @@ -2035,10 +2039,19 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context >> *ctx, >> LLVMIntEQ, >> src0, >> ctx->i32_0, >> ""), >> LLVMConstInt(ctx->i32, -1, 0), lsb, ""); >> } >> LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) >> { >> return LLVMPointerType(LLVMArrayType(elem_type, 0), >> AC_CONST_ADDR_SPACE); >> } >> + >> +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type) >> +{ >> + if (!HAVE_32BIT_POINTERS) >> + return ac_array_in_const_addr_space(elem_type); >> + >> + return LLVMPointerType(LLVMArrayType(elem_type, 0), >> + AC_CONST_32BIT_ADDR_SPACE); >> +} >> diff --git a/src/amd/common/ac_llvm_build.h >> b/src/amd/common/ac_llvm_build.h >> index b1c4737..5235664 100644 >> --- a/src/amd/common/ac_llvm_build.h >> +++ b/src/amd/common/ac_llvm_build.h >> @@ -27,36 +27,40 @@ >> #include <stdbool.h> >> #include <llvm-c/TargetMachine.h> >> #include "amd_family.h" >> #ifdef __cplusplus >> extern "C" { >> #endif >> +#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0600) >> + >> enum { >> AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that >> selects SMEM loads */ >> AC_LOCAL_ADDR_SPACE = 3, >> + AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer >> type has 32 bits */ >> }; >> struct ac_llvm_context { >> LLVMContextRef context; >> LLVMModuleRef module; >> LLVMBuilderRef builder; >> LLVMTypeRef voidt; >> LLVMTypeRef i1; >> LLVMTypeRef i8; >> LLVMTypeRef i16; >> LLVMTypeRef i32; >> LLVMTypeRef i64; >> + LLVMTypeRef intptr; >> LLVMTypeRef f16; >> LLVMTypeRef f32; >> LLVMTypeRef f64; >> LLVMTypeRef v2i16; >> LLVMTypeRef v2i32; >> LLVMTypeRef v3i32; >> LLVMTypeRef v4i32; >> LLVMTypeRef v2f32; >> LLVMTypeRef v4f32; >> LLVMTypeRef v8i32; >> @@ -331,16 +335,17 @@ void ac_declare_lds_as_pointer(struct >> ac_llvm_context *ac); >> LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, >> LLVMValueRef dw_addr); >> void ac_lds_store(struct ac_llvm_context *ctx, >> LLVMValueRef dw_addr, LLVMValueRef value); >> LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, >> LLVMTypeRef dst_type, >> LLVMValueRef src0); >> LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); >> +LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type); >> #ifdef __cplusplus >> } >> #endif >> #endif >> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c >> b/src/gallium/drivers/radeonsi/si_descriptors.c >> index b372090..810169d 100644 >> --- a/src/gallium/drivers/radeonsi/si_descriptors.c >> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c >> @@ -1996,31 +1996,35 @@ void si_shader_change_notify(struct si_context >> *sctx) >> } else { >> si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0); >> } >> } >> static void si_emit_shader_pointer_head(struct radeon_winsys_cs *cs, >> struct si_descriptors *desc, >> unsigned sh_base, >> unsigned pointer_count) >> { >> - radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * 2, 0)); >> + radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * >> (HAVE_32BIT_POINTERS ? 1 : 2), 0)); >> radeon_emit(cs, (sh_base + desc->shader_userdata_offset - >> SI_SH_REG_OFFSET) >> 2); >> } >> static void si_emit_shader_pointer_body(struct radeon_winsys_cs *cs, >> struct si_descriptors *desc) >> { >> uint64_t va = desc->gpu_address; >> radeon_emit(cs, va); >> - radeon_emit(cs, va >> 32); >> + >> + if (HAVE_32BIT_POINTERS) >> + assert(va <= 0xffffffff); >> + else >> + radeon_emit(cs, va >> 32); >> } >> static void si_emit_shader_pointer(struct si_context *sctx, >> struct si_descriptors *desc, >> unsigned sh_base) >> { >> struct radeon_winsys_cs *cs = sctx->b.gfx.cs; >> si_emit_shader_pointer_head(cs, desc, sh_base, 1); >> si_emit_shader_pointer_body(cs, desc); >> @@ -2553,22 +2557,24 @@ void si_all_resident_buffers_begin_new_cs(struct >> si_context *sctx) >> sctx->b.num_resident_handles += num_resident_tex_handles + >> num_resident_img_handles; >> } >> /* INIT/DEINIT/UPLOAD */ >> void si_init_all_descriptors(struct si_context *sctx) >> { >> int i; >> +#if !HAVE_32BIT_POINTERS >> STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); >> STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); >> +#endif >> for (i = 0; i < SI_NUM_SHADERS; i++) { >> bool gfx9_tcs = false; >> bool gfx9_gs = false; >> unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + >> SI_NUM_SAMPLERS; >> unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + >> SI_NUM_CONST_BUFFERS; >> struct si_descriptors *desc; >> if (sctx->b.chip_class >= GFX9) { >> gfx9_tcs = i == PIPE_SHADER_TESS_CTRL; >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c >> b/src/gallium/drivers/radeonsi/si_shader.c >> index 760e742..57336ff 100644 >> --- a/src/gallium/drivers/radeonsi/si_shader.c >> +++ b/src/gallium/drivers/radeonsi/si_shader.c >> @@ -3144,26 +3144,32 @@ si_insert_input_ret_float(struct si_shader_context >> *ctx, LLVMValueRef ret, >> { >> LLVMBuilderRef builder = ctx->ac.builder; >> LLVMValueRef p = LLVMGetParam(ctx->main_fn, param); >> return LLVMBuildInsertValue(builder, ret, >> ac_to_float(&ctx->ac, p), >> return_index, ""); >> } >> static LLVMValueRef >> -si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef >> ret, >> - unsigned param, unsigned return_index) >> +si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret, >> + unsigned param, unsigned return_index) >> { >> LLVMBuilderRef builder = ctx->ac.builder; >> LLVMValueRef ptr, lo, hi; >> + if (HAVE_32BIT_POINTERS) { >> + ptr = LLVMGetParam(ctx->main_fn, param); >> + ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, ""); >> + return LLVMBuildInsertValue(builder, ret, ptr, >> return_index, ""); >> + } >> + >> ptr = LLVMGetParam(ctx->main_fn, param); >> ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, ""); >> ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, ""); >> lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, ""); >> hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, ""); >> ret = LLVMBuildInsertValue(builder, ret, lo, return_index, ""); >> return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, >> ""); >> } >> /* This only writes the tessellation factor levels. */ >> @@ -3265,75 +3271,76 @@ static void si_llvm_emit_tcs_epilogue(struct >> ac_shader_abi *abi, >> /* Pass TCS inputs from LS to TCS on GFX9. */ >> static void si_set_ls_return_value_for_tcs(struct si_shader_context >> *ctx) >> { >> LLVMValueRef ret = ctx->return_value; >> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, >> 2); >> ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, >> 3); >> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, >> 4); >> ret = si_insert_input_ret(ctx, ret, >> ctx->param_merged_scratch_offset, 5); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, >> ctx->param_rw_buffers, >> - 8 + SI_SGPR_RW_BUFFERS); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, >> - ctx->param_bindless_samplers_and_images, >> - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); >> + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, >> + 8 + SI_SGPR_RW_BUFFERS); >> + ret = si_insert_input_ptr(ctx, ret, >> + ctx->param_bindless_samplers_and_images, >> + 8 + >> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); >> ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits, >> 8 + SI_SGPR_VS_STATE_BITS); >> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout, >> 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); >> ret = si_insert_input_ret(ctx, ret, >> ctx->param_tcs_out_lds_offsets, >> 8 + GFX9_SGPR_TCS_OUT_OFFSETS); >> ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_layout, >> 8 + GFX9_SGPR_TCS_OUT_LAYOUT); >> ret = si_insert_input_ret(ctx, ret, >> ctx->param_tcs_offchip_addr_base64k, >> 8 + GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K); >> ret = si_insert_input_ret(ctx, ret, >> ctx->param_tcs_factor_addr_base64k, >> 8 + GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K); >> - unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2; >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, >> - 8 + >> GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, >> - 8 + >> GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); >> + unsigned desc_param = ctx->param_tcs_factor_addr_base64k + >> + (HAVE_32BIT_POINTERS ? 1 : 2); >> + ret = si_insert_input_ptr(ctx, ret, desc_param, >> + 8 + >> GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); >> + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, >> + 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); >> unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; >> ret = LLVMBuildInsertValue(ctx->ac.builder, ret, >> ac_to_float(&ctx->ac, >> ctx->abi.tcs_patch_id), >> vgpr++, ""); >> ret = LLVMBuildInsertValue(ctx->ac.builder, ret, >> ac_to_float(&ctx->ac, >> ctx->abi.tcs_rel_ids), >> vgpr++, ""); >> ctx->return_value = ret; >> } >> /* Pass GS inputs from ES to GS on GFX9. */ >> static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) >> { >> LLVMValueRef ret = ctx->return_value; >> ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2); >> ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, >> 3); >> ret = si_insert_input_ret(ctx, ret, >> ctx->param_merged_scratch_offset, 5); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, >> ctx->param_rw_buffers, >> - 8 + SI_SGPR_RW_BUFFERS); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, >> - ctx->param_bindless_samplers_and_images, >> - 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); >> + ret = si_insert_input_ptr(ctx, ret, ctx->param_rw_buffers, >> + 8 + SI_SGPR_RW_BUFFERS); >> + ret = si_insert_input_ptr(ctx, ret, >> + ctx->param_bindless_samplers_and_images, >> + 8 + >> SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); >> unsigned desc_param = ctx->param_vs_state_bits + 1; >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, >> - 8 + >> GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); >> - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, >> - 8 + >> GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); >> + ret = si_insert_input_ptr(ctx, ret, desc_param, >> + 8 + >> GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); >> + ret = si_insert_input_ptr(ctx, ret, desc_param + 1, >> + 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); >> unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; >> for (unsigned i = 0; i < 5; i++) { >> unsigned param = ctx->param_gs_vtx01_offset + i; >> ret = si_insert_input_ret_float(ctx, ret, param, vgpr++); >> } >> ctx->return_value = ret; >> } >> static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, >> @@ -4424,46 +4431,46 @@ static void declare_per_stage_desc_pointers(struct >> si_shader_context *ctx, >> LLVMTypeRef const_shader_buf_type; >> if (ctx->shader->selector->info.const_buffers_declared == 1 && >> ctx->shader->selector->info.shader_buffers_declared == 0) >> const_shader_buf_type = ctx->f32; >> else >> const_shader_buf_type = ctx->v4i32; >> unsigned const_and_shader_buffers = >> add_arg(fninfo, ARG_SGPR, >> - >> ac_array_in_const_addr_space(const_shader_buf_type)); >> + >> ac_array_in_const32_addr_space(const_shader_buf_type)); >> unsigned samplers_and_images = >> add_arg(fninfo, ARG_SGPR, >> - ac_array_in_const_addr_space(ctx->v8i32)); >> + ac_array_in_const32_addr_space(ctx->v8i32)); >> if (assign_params) { >> ctx->param_const_and_shader_buffers = >> const_and_shader_buffers; >> ctx->param_samplers_and_images = samplers_and_images; >> } >> } >> static void declare_global_desc_pointers(struct si_shader_context >> *ctx, >> struct si_function_info *fninfo) >> { >> ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, >> - ac_array_in_const_addr_space(ctx->v4i32)); >> + ac_array_in_const32_addr_space(ctx->v4i32)); >> ctx->param_bindless_samplers_and_images = add_arg(fninfo, >> ARG_SGPR, >> - ac_array_in_const_addr_space(ctx->v8i32)); >> + ac_array_in_const32_addr_space(ctx->v8i32)); >> } >> static void declare_vs_specific_input_sgprs(struct si_shader_context >> *ctx, >> struct si_function_info >> *fninfo) >> { >> ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, >> - ac_array_in_const_addr_space(ctx->v4i32)); >> + ac_array_in_const32_addr_space(ctx->v4i32)); >> add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); >> add_arg_assign(fninfo, ARG_SGPR, ctx->i32, >> &ctx->abi.start_instance); >> add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); >> ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); >> } >> static void declare_vs_input_vgprs(struct si_shader_context *ctx, >> struct si_function_info *fninfo, >> unsigned *num_prolog_vgprs) >> { >> @@ -4617,21 +4624,22 @@ static void create_function(struct >> si_shader_context *ctx) >> declare_global_desc_pointers(ctx, &fninfo); >> declare_per_stage_desc_pointers(ctx, &fninfo, >> ctx->type == >> PIPE_SHADER_VERTEX); >> declare_vs_specific_input_sgprs(ctx, &fninfo); >> ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, >> ctx->i32); >> ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, >> ARG_SGPR, ctx->i32); >> ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, >> ctx->i32); >> ctx->param_tcs_offchip_addr_base64k = add_arg(&fninfo, >> ARG_SGPR, ctx->i32); >> ctx->param_tcs_factor_addr_base64k = add_arg(&fninfo, >> ARG_SGPR, ctx->i32); >> - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ >> + if (!HAVE_32BIT_POINTERS) >> + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ >> declare_per_stage_desc_pointers(ctx, &fninfo, >> ctx->type == >> PIPE_SHADER_TESS_CTRL); >> /* VGPRs (first TCS, then VS) */ >> add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, >> &ctx->abi.tcs_patch_id); >> add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, >> &ctx->abi.tcs_rel_ids); >> if (ctx->type == PIPE_SHADER_VERTEX) { >> declare_vs_input_vgprs(ctx, &fninfo, >> @@ -4673,21 +4681,22 @@ static void create_function(struct >> si_shader_context *ctx) >> ctx->type == >> PIPE_SHADER_TESS_EVAL)); >> if (ctx->type == PIPE_SHADER_VERTEX) { >> declare_vs_specific_input_sgprs(ctx, &fninfo); >> } else { >> /* TESS_EVAL (and also GEOMETRY): >> * Declare as many input SGPRs as the VS has. */ >> ctx->param_tcs_offchip_layout = add_arg(&fninfo, >> ARG_SGPR, ctx->i32); >> ctx->param_tcs_offchip_addr_base64k = >> add_arg(&fninfo, ARG_SGPR, ctx->i32); >> add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ >> add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ >> - add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */ >> + if (!HAVE_32BIT_POINTERS) >> + add_arg(&fninfo, ARG_SGPR, ctx->i32); /* >> unused */ >> ctx->param_vs_state_bits = add_arg(&fninfo, >> ARG_SGPR, ctx->i32); /* unused */ >> } >> declare_per_stage_desc_pointers(ctx, &fninfo, >> ctx->type == >> PIPE_SHADER_GEOMETRY); >> /* VGPRs (first GS, then VS/TES) */ >> ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, >> ctx->i32); >> ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, >> ctx->i32); >> add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, >> &ctx->abi.gs_prim_id); >> @@ -6385,20 +6394,25 @@ static void si_build_wrapper_function(struct >> si_shader_context *ctx, >> num_out = 0; >> num_out_sgpr = 0; >> for (unsigned i = 0; i < fninfo.num_params; ++i) { >> LLVMValueRef param = LLVMGetParam(ctx->main_fn, i); >> LLVMTypeRef param_type = LLVMTypeOf(param); >> LLVMTypeRef out_type = i < fninfo.num_sgpr_params ? >> ctx->i32 : ctx->f32; >> unsigned size = ac_get_type_size(param_type) / 4; >> if (size == 1) { >> + if (LLVMGetTypeKind(param_type) == >> LLVMPointerTypeKind) { >> + param = LLVMBuildPtrToInt(builder, param, >> ctx->i32, ""); >> + param_type = ctx->i32; >> + } >> + >> if (param_type != out_type) >> param = LLVMBuildBitCast(builder, param, >> out_type, ""); >> out[num_out++] = param; >> } else { >> LLVMTypeRef vector_type = LLVMVectorType(out_type, >> size); >> if (LLVMGetTypeKind(param_type) == >> LLVMPointerTypeKind) { >> param = LLVMBuildPtrToInt(builder, param, >> ctx->i64, ""); >> param_type = ctx->i64; >> } >> @@ -6460,22 +6474,28 @@ static void si_build_wrapper_function(struct >> si_shader_context *ctx, >> assert(out_idx + param_size <= (is_sgpr ? >> num_out_sgpr : num_out)); >> assert(is_sgpr || out_idx >= num_out_sgpr); >> if (param_size == 1) >> arg = out[out_idx]; >> else >> arg = >> lp_build_gather_values(&ctx->gallivm, &out[out_idx], param_size); >> if (LLVMTypeOf(arg) != param_type) { >> if (LLVMGetTypeKind(param_type) == >> LLVMPointerTypeKind) { >> - arg = LLVMBuildBitCast(builder, >> arg, ctx->i64, ""); >> - arg = LLVMBuildIntToPtr(builder, >> arg, param_type, ""); >> + if >> (LLVMGetPointerAddressSpace(param_type) == >> + AC_CONST_32BIT_ADDR_SPACE) { >> + arg = >> LLVMBuildBitCast(builder, arg, ctx->i32, ""); >> + arg = >> LLVMBuildIntToPtr(builder, arg, param_type, ""); >> + } else { >> + arg = >> LLVMBuildBitCast(builder, arg, ctx->i64, ""); >> + arg = >> LLVMBuildIntToPtr(builder, arg, param_type, ""); >> + } >> } else { >> arg = LLVMBuildBitCast(builder, >> arg, param_type, ""); >> } >> } >> in[param_idx] = arg; >> out_idx += param_size; >> } >> ret = LLVMBuildCall(builder, parts[part], in, num_params, >> ""); >> @@ -6934,23 +6954,30 @@ out: >> static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context >> *ctx) >> { >> LLVMValueRef ptr[2], list; >> bool is_merged_shader = >> ctx->screen->info.chip_class >= GFX9 && >> (ctx->type == PIPE_SHADER_TESS_CTRL || >> ctx->type == PIPE_SHADER_GEOMETRY || >> ctx->shader->key.as_ls || ctx->shader->key.as_es); >> + if (HAVE_32BIT_POINTERS) { >> + ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 >> : 0) + SI_SGPR_RW_BUFFERS); >> + list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], >> + >> ac_array_in_const32_addr_space(ctx->v4i32), ""); >> + return list; >> + } >> + >> /* Get the pointer to rw buffers. */ >> ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + >> SI_SGPR_RW_BUFFERS); >> - ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + >> SI_SGPR_RW_BUFFERS_HI); >> + ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + >> SI_SGPR_RW_BUFFERS + 1); > > > I think SI_SGPR_RW_BUFFERS_HI makes more sense here.
Yeah but I can't use the _HI definition here because it's not defined if HAVE_32BIT_POINTERS. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev