Not sure if we'll want to do this, since we'll need to need to effectively revert it anyways when we implement derivatives with DPP (although we'll have to rename has_ds_bpermute to has_dpp...).
On Wed, Sep 13, 2017 at 1:04 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > --- > src/amd/common/ac_llvm_build.c | 3 +-- > src/amd/common/ac_llvm_build.h | 1 - > src/amd/common/ac_nir_to_llvm.c | 5 +---- > src/gallium/drivers/radeonsi/si_pipe.c | 1 - > src/gallium/drivers/radeonsi/si_pipe.h | 1 - > src/gallium/drivers/radeonsi/si_shader.c | 3 +-- > 6 files changed, 3 insertions(+), 11 deletions(-) > > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c > index 4077bd81bbc..6c010e8c3a6 100644 > --- a/src/amd/common/ac_llvm_build.c > +++ b/src/amd/common/ac_llvm_build.c > @@ -965,29 +965,28 @@ ac_get_thread_id(struct ac_llvm_context *ctx) > * So, masking the TID with 0xfffffffc yields the TID of the top left pixel > * of the quad, masking with 0xfffffffd yields the TID of the top pixel of > * the current pixel's column, and masking with 0xfffffffe yields the TID > * of the left pixel of the current pixel's row. > * > * Adding 1 yields the TID of the pixel to the right of the left pixel, and > * adding 2 yields the TID of the pixel below the top pixel. > */ > LLVMValueRef > ac_build_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > uint32_t mask, > int idx, > LLVMValueRef val) > { > LLVMValueRef tl, trbl, args[2]; > LLVMValueRef result; > > - if (has_ds_bpermute) { > + if (ctx->chip_class >= VI) { > LLVMValueRef thread_id, tl_tid, trbl_tid; > thread_id = ac_get_thread_id(ctx); > > tl_tid = LLVMBuildAnd(ctx->builder, thread_id, > LLVMConstInt(ctx->i32, mask, false), > ""); > > trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid, > LLVMConstInt(ctx->i32, idx, false), > ""); > > args[0] = LLVMBuildMul(ctx->builder, tl_tid, > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h > index b6434893cfa..3f93551330c 100644 > --- a/src/amd/common/ac_llvm_build.h > +++ b/src/amd/common/ac_llvm_build.h > @@ -187,21 +187,20 @@ LLVMValueRef ac_build_buffer_load_format(struct > ac_llvm_context *ctx, > > LLVMValueRef > ac_get_thread_id(struct ac_llvm_context *ctx); > > #define AC_TID_MASK_TOP_LEFT 0xfffffffc > #define AC_TID_MASK_TOP 0xfffffffd > #define AC_TID_MASK_LEFT 0xfffffffe > > LLVMValueRef > ac_build_ddxy(struct ac_llvm_context *ctx, > - bool has_ds_bpermute, > uint32_t mask, > int idx, > LLVMValueRef val); > > #define AC_SENDMSG_GS 2 > #define AC_SENDMSG_GS_DONE 3 > > #define AC_SENDMSG_GS_OP_NOP (0 << 4) > #define AC_SENDMSG_GS_OP_CUT (1 << 4) > #define AC_SENDMSG_GS_OP_EMIT (2 << 4) > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c > index c0c4441022a..bf4b3ca6521 100644 > --- a/src/amd/common/ac_nir_to_llvm.c > +++ b/src/amd/common/ac_nir_to_llvm.c > @@ -1407,40 +1407,37 @@ static LLVMValueRef emit_unpack_half_2x16(struct > ac_llvm_context *ctx, > return result; > } > > static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, > nir_op op, > LLVMValueRef src0) > { > unsigned mask; > int idx; > LLVMValueRef result; > - bool has_ds_bpermute = ctx->abi->chip_class >= VI; > > if (op == nir_op_fddx_fine || op == nir_op_fddx) > mask = AC_TID_MASK_LEFT; > else if (op == nir_op_fddy_fine || op == nir_op_fddy) > mask = AC_TID_MASK_TOP; > else > mask = AC_TID_MASK_TOP_LEFT; > > /* for DDX we want to next X pixel, DDY next Y pixel. */ > if (op == nir_op_fddx_fine || > op == nir_op_fddx_coarse || > op == nir_op_fddx) > idx = 1; > else > idx = 2; > > - result = ac_build_ddxy(&ctx->ac, has_ds_bpermute, > - mask, idx, > - src0); > + result = ac_build_ddxy(&ctx->ac, mask, idx, src0); > return result; > } > > /* > * this takes an I,J coordinate pair, > * and works out the X and Y derivatives. > * it returns DDX(I), DDX(J), DDY(I), DDY(J). > */ > static LLVMValueRef emit_ddxy_interp( > struct ac_nir_context *ctx, > diff --git a/src/gallium/drivers/radeonsi/si_pipe.c > b/src/gallium/drivers/radeonsi/si_pipe.c > index ca2e055a90e..bb1362f1cfc 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.c > +++ b/src/gallium/drivers/radeonsi/si_pipe.c > @@ -1037,21 +1037,20 @@ struct pipe_screen *radeonsi_screen_create(struct > radeon_winsys *ws, > (sscreen->b.chip_class == VI && > sscreen->b.info.pfp_fw_version >= 121 && > sscreen->b.info.me_fw_version >= 87) || > (sscreen->b.chip_class == CIK && > sscreen->b.info.pfp_fw_version >= 211 && > sscreen->b.info.me_fw_version >= 173) || > (sscreen->b.chip_class == SI && > sscreen->b.info.pfp_fw_version >= 79 && > sscreen->b.info.me_fw_version >= 142); > > - sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI; > sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= > CHIP_POLARIS10 && > sscreen->b.family <= > CHIP_POLARIS12) || > sscreen->b.family == CHIP_VEGA10 || > sscreen->b.family == CHIP_RAVEN; > sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 && > !(sscreen->b.debug_flags & DBG_NO_DPBB); > sscreen->dfsm_allowed = sscreen->dpbb_allowed && > !(sscreen->b.debug_flags & DBG_NO_DFSM); > > /* While it would be nice not to have this flag, we are constrained > diff --git a/src/gallium/drivers/radeonsi/si_pipe.h > b/src/gallium/drivers/radeonsi/si_pipe.h > index 8db7028c9a1..10215a35886 100644 > --- a/src/gallium/drivers/radeonsi/si_pipe.h > +++ b/src/gallium/drivers/radeonsi/si_pipe.h > @@ -87,21 +87,20 @@ struct si_compute; > struct hash_table; > struct u_suballocator; > > struct si_screen { > struct r600_common_screen b; > unsigned gs_table_depth; > unsigned tess_offchip_block_dw_size; > bool has_clear_state; > bool has_distributed_tess; > bool has_draw_indirect_multi; > - bool has_ds_bpermute; > bool has_msaa_sample_loc_bug; > bool dpbb_allowed; > bool dfsm_allowed; > bool llvm_has_working_vgpr_indexing; > > /* Whether shaders are monolithic (1-part) or separate (3-part). */ > bool use_monolithic_shaders; > bool record_llvm_ir; > > mtx_t shader_parts_mutex; > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index c4e7f225a8f..aea199d3efd 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -3646,22 +3646,21 @@ static void si_llvm_emit_ddxy( > mask = AC_TID_MASK_LEFT; > else if (opcode == TGSI_OPCODE_DDY_FINE) > mask = AC_TID_MASK_TOP; > else > mask = AC_TID_MASK_TOP_LEFT; > > /* for DDX we want to next X pixel, DDY next Y pixel. */ > idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? > 1 : 2; > > val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], > ctx->i32, ""); > - val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute, > - mask, idx, val); > + val = ac_build_ddxy(&ctx->ac, mask, idx, val); > emit_data->output[emit_data->chan] = val; > } > > /* > * this takes an I,J coordinate pair, > * and works out the X and Y derivatives. > * it returns DDX(I), DDX(J), DDY(I), DDY(J). > */ > static LLVMValueRef si_llvm_emit_ddxy_interp( > struct lp_build_tgsi_context *bld_base, > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev