From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 6 ++++++ src/amd/common/ac_llvm_build.h | 3 +++ src/amd/common/ac_nir_to_llvm.c | 20 +++++++------------- src/gallium/drivers/radeonsi/si_shader.c | 19 ++++++------------- src/gallium/drivers/radeonsi/si_shader_internal.h | 2 -- src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 6 +++--- 6 files changed, 25 insertions(+), 31 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index a3af204..164f310 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -2026,10 +2026,16 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, params, 2, AC_FUNC_ATTR_READNONE); /* TODO: We need an intrinsic to skip this conditional. */ /* Check for zero: */ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, ctx->i32_0, ""), LLVMConstInt(ctx->i32, -1, 0), lsb, ""); } + +LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type) +{ + return LLVMPointerType(LLVMArrayType(elem_type, 0), + AC_CONST_ADDR_SPACE); +} diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 2d6efb5..b1c4737 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -329,15 +329,18 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx); void ac_declare_lds_as_pointer(struct ac_llvm_context *ac); LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr); void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value); LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0); + +LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type); + #ifdef __cplusplus } #endif #endif diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index 0445d27..bc5b140 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -344,26 +344,20 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, LLVMAddTargetDependentFunctionAttr(main_function, "no-nans-fp-math", "true"); LLVMAddTargetDependentFunctionAttr(main_function, "unsafe-fp-math", "true"); } return main_function; } -static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) -{ - return LLVMPointerType(LLVMArrayType(elem_type, num_elements), - AC_CONST_ADDR_SPACE); -} - static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) { if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) type = LLVMGetElementType(type); if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind) return LLVMGetIntTypeWidth(type); if (type == ctx->f16) return 16; @@ -606,58 +600,58 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, static void declare_global_input_sgprs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, bool has_previous_stage, gl_shader_stage previous_stage, const struct user_sgpr_info *user_sgpr_info, struct arg_info *args, LLVMValueRef *desc_sets) { - LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024); + LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; unsigned stage_mask = 1 << stage; if (has_previous_stage) stage_mask |= 1 << previous_stage; /* 1 for each descriptor set */ if (!user_sgpr_info->indirect_all_descriptor_sets) { for (unsigned i = 0; i < num_sets; ++i) { if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) { add_array_arg(args, type, &ctx->descriptor_sets[i]); } } } else { - add_array_arg(args, const_array(type, 32), desc_sets); + add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); } if (ctx->shader_info->info.needs_push_constants) { /* 1 for push constants and dynamic descriptors */ add_array_arg(args, type, &ctx->push_constants); } } static void declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx, gl_shader_stage stage, bool has_previous_stage, gl_shader_stage previous_stage, struct arg_info *args) { if (!ctx->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), + add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), &ctx->vertex_buffers); } add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); if (ctx->shader_info->info.vs.needs_draw_id) { add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); } } } @@ -753,21 +747,21 @@ static void create_function(struct nir_to_llvm_context *ctx, gl_shader_stage previous_stage) { uint8_t user_sgpr_idx; struct user_sgpr_info user_sgpr_info; struct arg_info args = {}; LLVMValueRef desc_sets; allocate_user_sgprs(ctx, stage, &user_sgpr_info); if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { - add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16), + add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), &ctx->ring_offsets); } switch (stage) { case MESA_SHADER_COMPUTE: declare_global_input_sgprs(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets); if (ctx->shader_info->info.cs.uses_grid_size) { @@ -1036,21 +1030,21 @@ static void create_function(struct nir_to_llvm_context *ctx, user_sgpr_idx = 0; if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx, 2); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), NULL, 0, AC_FUNC_ATTR_READNONE); ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets, - const_array(ctx->ac.v4i32, 16), ""); + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); } } /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ if (has_previous_stage) user_sgpr_idx = 0; set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, desc_sets, &user_sgpr_idx); @@ -3945,21 +3939,21 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, return NULL; } static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, LLVMValueRef sample_id) { LLVMValueRef result; LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); ptr = LLVMBuildBitCast(ctx->builder, ptr, - const_array(ctx->ac.v2f32, 64), ""); + ac_array_in_const_addr_space(ctx->ac.v2f32), ""); sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, ""); result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); return result; } static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { LLVMValueRef values[2]; @@ -4458,21 +4452,21 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, } assert(stride % type_size == 0); if (!index) index = ctx->ac.i32_0; index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, const_array(type, 0), ""); + list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); return ac_build_load_to_sgpr(&ctx->ac, list, index); } static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, const nir_tex_instr *tex_instr, bool image, bool write) { diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a1cc6e1..760e742 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -3820,26 +3820,20 @@ static void clock_emit( tmp = lp_build_intrinsic(ctx->ac.builder, "llvm.readcyclecounter", ctx->i64, NULL, 0, 0); tmp = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->v2i32, ""); emit_data->output[0] = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, ""); emit_data->output[1] = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, ""); } -LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int num_elements) -{ - return LLVMPointerType(LLVMArrayType(elem_type, num_elements), - AC_CONST_ADDR_SPACE); -} - static void si_llvm_emit_ddxy( const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); unsigned opcode = emit_data->info->opcode; LLVMValueRef val; int idx; unsigned mask; @@ -4430,47 +4424,46 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, LLVMTypeRef const_shader_buf_type; if (ctx->shader->selector->info.const_buffers_declared == 1 && ctx->shader->selector->info.shader_buffers_declared == 0) const_shader_buf_type = ctx->f32; else const_shader_buf_type = ctx->v4i32; unsigned const_and_shader_buffers = add_arg(fninfo, ARG_SGPR, - si_const_array(const_shader_buf_type, 0)); + ac_array_in_const_addr_space(const_shader_buf_type)); unsigned samplers_and_images = add_arg(fninfo, ARG_SGPR, - si_const_array(ctx->v8i32, - SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2)); + ac_array_in_const_addr_space(ctx->v8i32)); if (assign_params) { ctx->param_const_and_shader_buffers = const_and_shader_buffers; ctx->param_samplers_and_images = samplers_and_images; } } static void declare_global_desc_pointers(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR, - si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS)); + ac_array_in_const_addr_space(ctx->v4i32)); ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR, - si_const_array(ctx->v8i32, 0)); + ac_array_in_const_addr_space(ctx->v8i32)); } static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx, struct si_function_info *fninfo) { ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR, - si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS)); + ac_array_in_const_addr_space(ctx->v4i32)); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance); add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id); ctx->param_vs_state_bits = add_arg(fninfo, ARG_SGPR, ctx->i32); } static void declare_vs_input_vgprs(struct si_shader_context *ctx, struct si_function_info *fninfo, unsigned *num_prolog_vgprs) { @@ -6947,21 +6940,21 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx) (ctx->type == PIPE_SHADER_TESS_CTRL || ctx->type == PIPE_SHADER_GEOMETRY || ctx->shader->key.as_ls || ctx->shader->key.as_es); /* Get the pointer to rw buffers. */ ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS); ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI); list = lp_build_gather_values(&ctx->gallivm, ptr, 2); list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, ""); list = LLVMBuildIntToPtr(ctx->ac.builder, list, - si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), ""); + ac_array_in_const_addr_space(ctx->v4i32), ""); return list; } /** * Build the vertex shader prolog function. * * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values). * All inputs are returned unmodified. The vertex load indices are * stored after them, which will be used by the API VS for fetching inputs. * diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 6b4acc5..9a8e17b 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -302,22 +302,20 @@ void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, #define LGKM_CNT 0x07f #define VM_CNT 0xf70 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, unsigned addr_mul, int rel_index); LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, const struct tgsi_ind_register *ind, int rel_index, unsigned num); -LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int num_elements); - void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base); void si_shader_context_init_mem(struct si_shader_context *ctx); LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, enum ac_descriptor_type type); LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, enum ac_descriptor_type desc_type, bool dcc_off); diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index fe0cfce..1aa7304 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -133,21 +133,21 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rsrc; if (desc_type == AC_DESC_BUFFER) { index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const_addr_space(ctx->v4i32), ""); } else { assert(desc_type == AC_DESC_IMAGE); } rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); if (desc_type == AC_DESC_IMAGE && dcc_off) rsrc = force_dcc_off(ctx, rsrc); return rsrc; } @@ -1111,33 +1111,33 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, switch (type) { case AC_DESC_IMAGE: /* The image is at [0:7]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); break; case AC_DESC_BUFFER: /* The buffer is in [4:7]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const_addr_space(ctx->v4i32), ""); break; case AC_DESC_FMASK: /* The FMASK is at [8:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); break; case AC_DESC_SAMPLER: /* The sampler state is at [12:15]. */ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const_addr_space(ctx->v4i32), ""); break; } return ac_build_load_to_sgpr(&ctx->ac, list, index); } /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev