Not sure if we'll want to do this, since we'll need to need to
effectively revert it anyways when we implement derivatives with DPP
(although we'll have to rename has_ds_bpermute to has_dpp...).

On Wed, Sep 13, 2017 at 1:04 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haeh...@amd.com>
>
> ---
>  src/amd/common/ac_llvm_build.c           | 3 +--
>  src/amd/common/ac_llvm_build.h           | 1 -
>  src/amd/common/ac_nir_to_llvm.c          | 5 +----
>  src/gallium/drivers/radeonsi/si_pipe.c   | 1 -
>  src/gallium/drivers/radeonsi/si_pipe.h   | 1 -
>  src/gallium/drivers/radeonsi/si_shader.c | 3 +--
>  6 files changed, 3 insertions(+), 11 deletions(-)
>
> diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> index 4077bd81bbc..6c010e8c3a6 100644
> --- a/src/amd/common/ac_llvm_build.c
> +++ b/src/amd/common/ac_llvm_build.c
> @@ -965,29 +965,28 @@ ac_get_thread_id(struct ac_llvm_context *ctx)
>   * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
>   * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
>   * the current pixel's column, and masking with 0xfffffffe yields the TID
>   * of the left pixel of the current pixel's row.
>   *
>   * Adding 1 yields the TID of the pixel to the right of the left pixel, and
>   * adding 2 yields the TID of the pixel below the top pixel.
>   */
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> -             bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
>               LLVMValueRef val)
>  {
>         LLVMValueRef tl, trbl, args[2];
>         LLVMValueRef result;
>
> -       if (has_ds_bpermute) {
> +       if (ctx->chip_class >= VI) {
>                 LLVMValueRef thread_id, tl_tid, trbl_tid;
>                 thread_id = ac_get_thread_id(ctx);
>
>                 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
>                                       LLVMConstInt(ctx->i32, mask, false), 
> "");
>
>                 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
>                                         LLVMConstInt(ctx->i32, idx, false), 
> "");
>
>                 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index b6434893cfa..3f93551330c 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -187,21 +187,20 @@ LLVMValueRef ac_build_buffer_load_format(struct 
> ac_llvm_context *ctx,
>
>  LLVMValueRef
>  ac_get_thread_id(struct ac_llvm_context *ctx);
>
>  #define AC_TID_MASK_TOP_LEFT 0xfffffffc
>  #define AC_TID_MASK_TOP      0xfffffffd
>  #define AC_TID_MASK_LEFT     0xfffffffe
>
>  LLVMValueRef
>  ac_build_ddxy(struct ac_llvm_context *ctx,
> -             bool has_ds_bpermute,
>               uint32_t mask,
>               int idx,
>               LLVMValueRef val);
>
>  #define AC_SENDMSG_GS 2
>  #define AC_SENDMSG_GS_DONE 3
>
>  #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
>  #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
>  #define AC_SENDMSG_GS_OP_EMIT     (2 << 4)
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index c0c4441022a..bf4b3ca6521 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -1407,40 +1407,37 @@ static LLVMValueRef emit_unpack_half_2x16(struct 
> ac_llvm_context *ctx,
>         return result;
>  }
>
>  static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
>                               nir_op op,
>                               LLVMValueRef src0)
>  {
>         unsigned mask;
>         int idx;
>         LLVMValueRef result;
> -       bool has_ds_bpermute = ctx->abi->chip_class >= VI;
>
>         if (op == nir_op_fddx_fine || op == nir_op_fddx)
>                 mask = AC_TID_MASK_LEFT;
>         else if (op == nir_op_fddy_fine || op == nir_op_fddy)
>                 mask = AC_TID_MASK_TOP;
>         else
>                 mask = AC_TID_MASK_TOP_LEFT;
>
>         /* for DDX we want to next X pixel, DDY next Y pixel. */
>         if (op == nir_op_fddx_fine ||
>             op == nir_op_fddx_coarse ||
>             op == nir_op_fddx)
>                 idx = 1;
>         else
>                 idx = 2;
>
> -       result = ac_build_ddxy(&ctx->ac, has_ds_bpermute,
> -                             mask, idx,
> -                             src0);
> +       result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
>         return result;
>  }
>
>  /*
>   * this takes an I,J coordinate pair,
>   * and works out the X and Y derivatives.
>   * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>   */
>  static LLVMValueRef emit_ddxy_interp(
>         struct ac_nir_context *ctx,
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index ca2e055a90e..bb1362f1cfc 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -1037,21 +1037,20 @@ struct pipe_screen *radeonsi_screen_create(struct 
> radeon_winsys *ws,
>                 (sscreen->b.chip_class == VI &&
>                  sscreen->b.info.pfp_fw_version >= 121 &&
>                  sscreen->b.info.me_fw_version >= 87) ||
>                 (sscreen->b.chip_class == CIK &&
>                  sscreen->b.info.pfp_fw_version >= 211 &&
>                  sscreen->b.info.me_fw_version >= 173) ||
>                 (sscreen->b.chip_class == SI &&
>                  sscreen->b.info.pfp_fw_version >= 79 &&
>                  sscreen->b.info.me_fw_version >= 142);
>
> -       sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
>         sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= 
> CHIP_POLARIS10 &&
>                                             sscreen->b.family <= 
> CHIP_POLARIS12) ||
>                                            sscreen->b.family == CHIP_VEGA10 ||
>                                            sscreen->b.family == CHIP_RAVEN;
>         sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
>                                 !(sscreen->b.debug_flags & DBG_NO_DPBB);
>         sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
>                                 !(sscreen->b.debug_flags & DBG_NO_DFSM);
>
>         /* While it would be nice not to have this flag, we are constrained
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
> b/src/gallium/drivers/radeonsi/si_pipe.h
> index 8db7028c9a1..10215a35886 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -87,21 +87,20 @@ struct si_compute;
>  struct hash_table;
>  struct u_suballocator;
>
>  struct si_screen {
>         struct r600_common_screen       b;
>         unsigned                        gs_table_depth;
>         unsigned                        tess_offchip_block_dw_size;
>         bool                            has_clear_state;
>         bool                            has_distributed_tess;
>         bool                            has_draw_indirect_multi;
> -       bool                            has_ds_bpermute;
>         bool                            has_msaa_sample_loc_bug;
>         bool                            dpbb_allowed;
>         bool                            dfsm_allowed;
>         bool                            llvm_has_working_vgpr_indexing;
>
>         /* Whether shaders are monolithic (1-part) or separate (3-part). */
>         bool                            use_monolithic_shaders;
>         bool                            record_llvm_ir;
>
>         mtx_t                   shader_parts_mutex;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index c4e7f225a8f..aea199d3efd 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -3646,22 +3646,21 @@ static void si_llvm_emit_ddxy(
>                 mask = AC_TID_MASK_LEFT;
>         else if (opcode == TGSI_OPCODE_DDY_FINE)
>                 mask = AC_TID_MASK_TOP;
>         else
>                 mask = AC_TID_MASK_TOP_LEFT;
>
>         /* for DDX we want to next X pixel, DDY next Y pixel. */
>         idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 
> 1 : 2;
>
>         val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], 
> ctx->i32, "");
> -       val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
> -                           mask, idx, val);
> +       val = ac_build_ddxy(&ctx->ac, mask, idx, val);
>         emit_data->output[emit_data->chan] = val;
>  }
>
>  /*
>   * this takes an I,J coordinate pair,
>   * and works out the X and Y derivatives.
>   * it returns DDX(I), DDX(J), DDY(I), DDY(J).
>   */
>  static LLVMValueRef si_llvm_emit_ddxy_interp(
>         struct lp_build_tgsi_context *bld_base,
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to