On Thu, Nov 01, 2018 at 08:04:20PM -0700, Kenneth Graunke wrote:
> While this does add a bunch of boilerplate, it also protects us against
> the hardware moving bits, or changing their meaning.  For something as
> finnicky as PIPE_CONTROL, the extra safety seems worth it.
> 
> We turn PIPE_CONTROL_* into an bitfield of arbitrary flags, and then
> pack them appropriately.

This is clearly better than before in my opinion. Few suggestions below but
patches 1-3 are:

Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com>

> ---
>  src/mesa/drivers/dri/i965/Makefile.sources    |   9 +
>  src/mesa/drivers/dri/i965/brw_context.h       |   4 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c  | 240 +++--------------
>  src/mesa/drivers/dri/i965/brw_pipe_control.h  |  58 +++--
>  src/mesa/drivers/dri/i965/brw_state.h         |  31 +++
>  src/mesa/drivers/dri/i965/genX_pipe_control.c | 243 ++++++++++++++++++
>  src/mesa/drivers/dri/i965/meson.build         |   4 +-
>  7 files changed, 359 insertions(+), 230 deletions(-)
>  create mode 100644 src/mesa/drivers/dri/i965/genX_pipe_control.c
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
> b/src/mesa/drivers/dri/i965/Makefile.sources
> index 63fa7b886f2..e4eb0339e09 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -127,46 +127,55 @@ intel_tiled_memcpy_dep_FILES = \
>  i965_gen4_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen45_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen5_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen6_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen7_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen75_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen8_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen9_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen10_FILES = \
>       genX_blorp_exec.c \
>       genX_boilerplate.h \
> +     genX_pipe_control.c \
>       genX_state_upload.c
>  
>  i965_gen11_FILES = \
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 7fd15669eb9..fe75425854c 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -752,6 +752,10 @@ struct brw_context
>                                          struct brw_bo *bo,
>                                          uint32_t offset_in_bytes,
>                                          uint32_t report_id);
> +
> +      void (*emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> +                                    struct brw_bo *bo, uint32_t offset,
> +                                    uint64_t imm);
>     } vtbl;
>  
>     struct brw_bufmgr *bufmgr;
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 4d76e5dc9b7..cf9cc35875f 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -23,200 +23,10 @@
>  
>  #include "brw_context.h"
>  #include "brw_defines.h"
> +#include "brw_state.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_fbo.h"
>  
> -/**
> - * According to the latest documentation, any PIPE_CONTROL with the
> - * "Command Streamer Stall" bit set must also have another bit set,
> - * with five different options:
> - *
> - *  - Render Target Cache Flush
> - *  - Depth Cache Flush
> - *  - Stall at Pixel Scoreboard
> - *  - Post-Sync Operation
> - *  - Depth Stall
> - *  - DC Flush Enable
> - *
> - * I chose "Stall at Pixel Scoreboard" since we've used it effectively
> - * in the past, but the choice is fairly arbitrary.
> - */
> -static void
> -gen8_add_cs_stall_workaround_bits(uint32_t *flags)
> -{
> -   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> -                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> -                      PIPE_CONTROL_WRITE_IMMEDIATE |
> -                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
> -                      PIPE_CONTROL_WRITE_TIMESTAMP |
> -                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
> -                      PIPE_CONTROL_DEPTH_STALL |
> -                      PIPE_CONTROL_DATA_CACHE_FLUSH;
> -
> -   /* If we're doing a CS stall, and don't already have one of the
> -    * workaround bits set, add "Stall at Pixel Scoreboard."
> -    */
> -   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
> -      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> -}
> -
> -/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> - *
> - * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> - *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> - *
> - * Note that the kernel does CS stalls between batches, so we only need
> - * to count them within a batch.
> - */
> -static uint32_t
> -gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t 
> flags)
> -{
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -
> -   if (devinfo->gen == 7 && !devinfo->is_haswell) {
> -      if (flags & PIPE_CONTROL_CS_STALL) {
> -         /* If we're doing a CS stall, reset the counter and carry on. */
> -         brw->pipe_controls_since_last_cs_stall = 0;
> -         return 0;
> -      }
> -
> -      /* If this is the fourth pipe control without a CS stall, do one now. 
> */
> -      if (++brw->pipe_controls_since_last_cs_stall == 4) {
> -         brw->pipe_controls_since_last_cs_stall = 0;
> -         return PIPE_CONTROL_CS_STALL;
> -      }
> -   }
> -   return 0;
> -}
> -
> -/* #1130 from gen10 workarounds page in h/w specs:
> - * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
> - *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
> - *  Render target cache flush is enabled."
> - *
> - * Applicable to CNL B0 and C0 steppings only.
> - */
> -static void
> -gen10_add_rcpfe_workaround_bits(uint32_t *flags)
> -{
> -   if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> -      *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> -   } else if (*flags &
> -             (PIPE_CONTROL_WRITE_IMMEDIATE |
> -              PIPE_CONTROL_WRITE_DEPTH_COUNT |
> -              PIPE_CONTROL_WRITE_TIMESTAMP)) {
> -      *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
> -   }
> -}
> -
> -static void
> -brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
> -                      struct brw_bo *bo, uint32_t offset, uint64_t imm)
> -{
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -
> -   if (devinfo->gen >= 8) {
> -      if (devinfo->gen == 8)
> -         gen8_add_cs_stall_workaround_bits(&flags);
> -
> -      if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
> -         if (devinfo->gen == 9) {
> -            /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit 
> description
> -             * lists several workarounds:
> -             *
> -             *    "Project: SKL, KBL, BXT
> -             *
> -             *     If the VF Cache Invalidation Enable is set to a 1 in a
> -             *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> -             *     sets to 0, with the VF Cache Invalidation Enable set to 0
> -             *     needs to be sent prior to the PIPE_CONTROL with VF Cache
> -             *     Invalidation Enable set to a 1."
> -             */
> -            brw_emit_pipe_control_flush(brw, 0);
> -         }
> -
> -         if (devinfo->gen >= 9) {
> -            /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
> -             *
> -             *    "Project: BDW+
> -             *
> -             *     When VF Cache Invalidate is set “Post Sync Operation” must
> -             *     be enabled to “Write Immediate Data” or “Write PS Depth
> -             *     Count” or “Write Timestamp”."
> -             *
> -             * If there's a BO, we're already doing some kind of write.
> -             * If not, add a write to the workaround BO.
> -             *
> -             * XXX: This causes GPU hangs on Broadwell, so restrict it to
> -             *      Gen9+ for now...see this bug for more information:
> -             *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
> -             */
> -            if (!bo) {
> -               flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> -               bo = brw->workaround_bo;
> -            }
> -         }
> -      }
> -
> -      if (devinfo->gen == 10)
> -         gen10_add_rcpfe_workaround_bits(&flags);
> -
> -      BEGIN_BATCH(6);
> -      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
> -      OUT_BATCH(flags);
> -      if (bo) {
> -         OUT_RELOC64(bo, RELOC_WRITE, offset);
> -      } else {
> -         OUT_BATCH(0);
> -         OUT_BATCH(0);
> -      }
> -      OUT_BATCH(imm);
> -      OUT_BATCH(imm >> 32);
> -      ADVANCE_BATCH();
> -   } else if (devinfo->gen >= 6) {
> -      if (devinfo->gen == 6 &&
> -          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> -         /* Hardware workaround: SNB B-Spec says:
> -          *
> -          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> -          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> -          *   required.
> -          */
> -         brw_emit_post_sync_nonzero_flush(brw);
> -      }
> -
> -      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
> -
> -      /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
> -       * on later platforms.  We always use PPGTT on Gen7+.
> -       */
> -      unsigned gen6_gtt = devinfo->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE 
> : 0;
> -
> -      BEGIN_BATCH(5);
> -      OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
> -      OUT_BATCH(flags);
> -      if (bo) {
> -         OUT_RELOC(bo, RELOC_WRITE | RELOC_NEEDS_GGTT, gen6_gtt | offset);
> -      } else {
> -         OUT_BATCH(0);
> -      }
> -      OUT_BATCH(imm);
> -      OUT_BATCH(imm >> 32);
> -      ADVANCE_BATCH();
> -   } else {
> -      BEGIN_BATCH(4);
> -      OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
> -      if (bo) {
> -         OUT_RELOC(bo, RELOC_WRITE, PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
> -      } else {
> -         OUT_BATCH(0);
> -      }
> -      OUT_BATCH(imm);
> -      OUT_BATCH(imm >> 32);
> -      ADVANCE_BATCH();
> -   }
> -}
> -
>  /**
>   * Emit a PIPE_CONTROL with various flushing flags.
>   *
> @@ -246,7 +56,7 @@ brw_emit_pipe_control_flush(struct brw_context *brw, 
> uint32_t flags)
>        flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
>     }
>  
> -   brw_emit_pipe_control(brw, flags, NULL, 0, 0);
> +   brw->vtbl.emit_raw_pipe_control(brw, flags, NULL, 0, 0);
>  }
>  
>  /**
> @@ -262,7 +72,7 @@ brw_emit_pipe_control_write(struct brw_context *brw, 
> uint32_t flags,
>                              struct brw_bo *bo, uint32_t offset,
>                              uint64_t imm)
>  {
> -   brw_emit_pipe_control(brw, flags, bo, offset, imm);
> +   brw->vtbl.emit_raw_pipe_control(brw, flags, bo, offset, imm);
>  }
>  
>  /**
> @@ -357,14 +167,14 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
>  void
>  gen10_emit_isp_disable(struct brw_context *brw)
>  {
> -   brw_emit_pipe_control(brw,
> -                         PIPE_CONTROL_STALL_AT_SCOREBOARD |
> -                         PIPE_CONTROL_CS_STALL,
> -                         NULL, 0, 0);
> -   brw_emit_pipe_control(brw,
> -                         PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
> -                         PIPE_CONTROL_CS_STALL,
> -                         NULL, 0, 0);
> +   brw->vtbl.emit_raw_pipe_control(brw,
> +                                   PIPE_CONTROL_STALL_AT_SCOREBOARD |
> +                                   PIPE_CONTROL_CS_STALL,
> +                                   NULL, 0, 0);
> +   brw->vtbl.emit_raw_pipe_control(brw,
> +                                   
> PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE |
> +                                   PIPE_CONTROL_CS_STALL,
> +                                   NULL, 0, 0);
>  
>     brw->vs.base.push_constants_dirty = true;
>     brw->tcs.base.push_constants_dirty = true;
> @@ -561,6 +371,34 @@ int
>  brw_init_pipe_control(struct brw_context *brw,
>                        const struct gen_device_info *devinfo)
>  {
> +   switch (devinfo->gen) {
> +   case 10:
> +      brw->vtbl.emit_raw_pipe_control = gen10_emit_raw_pipe_control;
> +      break;
> +   case 9:
> +      brw->vtbl.emit_raw_pipe_control = gen9_emit_raw_pipe_control;
> +      break;
> +   case 8:
> +      brw->vtbl.emit_raw_pipe_control = gen8_emit_raw_pipe_control;
> +      break;
> +   case 7:
> +      brw->vtbl.emit_raw_pipe_control =
> +         devinfo->is_haswell ? gen75_emit_raw_pipe_control
> +                             : gen7_emit_raw_pipe_control;
> +      break;
> +   case 6:
> +      brw->vtbl.emit_raw_pipe_control = gen6_emit_raw_pipe_control;
> +      break;
> +   case 5:
> +      brw->vtbl.emit_raw_pipe_control = gen5_emit_raw_pipe_control;
> +      break;
> +   case 4:
> +      brw->vtbl.emit_raw_pipe_control =
> +         devinfo->is_g4x ? gen45_emit_raw_pipe_control
> +                         : gen4_emit_raw_pipe_control;
> +      break;
> +   }
> +
>     if (devinfo->gen < 6)
>        return 0;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.h 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.h
> index 69b1c7c31e6..e213f43a4f7 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.h
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.h
> @@ -32,34 +32,38 @@ struct brw_bo;
>   *
>   * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
>   * additional flushing control.
> + *
> + * The bits here are not the actual hardware values.  The actual values
> + * shift around a bit per-generation, so we just have flags for each
> + * potential operation, and use genxml to encode the actual packet.
>   */
> -#define _3DSTATE_PIPE_CONTROL                (CMD_3D | (3 << 27) | (2 << 24))
> -#define PIPE_CONTROL_LRI_WRITE_IMMEDIATE (1 << 23) /* Gen7+ */
> -#define PIPE_CONTROL_CS_STALL                (1 << 20)
> -#define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     (1 << 19)
> -#define PIPE_CONTROL_TLB_INVALIDATE  (1 << 18)
> -#define PIPE_CONTROL_SYNC_GFDT               (1 << 17)
> -#define PIPE_CONTROL_MEDIA_STATE_CLEAR       (1 << 16)
> -#define PIPE_CONTROL_NO_WRITE                (0 << 14)
> -#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
> -#define PIPE_CONTROL_WRITE_DEPTH_COUNT       (2 << 14)
> -#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
> -#define PIPE_CONTROL_DEPTH_STALL     (1 << 13)
> -#define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
> -#define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
> -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        (1 << 10) /* GM45+ only 
> */
> -#define PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE (1 << 9)
> -#define PIPE_CONTROL_INTERRUPT_ENABLE        (1 << 8)
> -#define PIPE_CONTROL_FLUSH_ENABLE    (1 << 7) /* Gen7+ only */
> -/* GT */
> -#define PIPE_CONTROL_DATA_CACHE_FLUSH        (1 << 5)
> -#define PIPE_CONTROL_VF_CACHE_INVALIDATE     (1 << 4)
> -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE  (1 << 3)
> -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE  (1 << 2)
> -#define PIPE_CONTROL_STALL_AT_SCOREBOARD     (1 << 1)
> -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH               (1 << 0)
> -#define PIPE_CONTROL_PPGTT_WRITE     (0 << 2)
> -#define PIPE_CONTROL_GLOBAL_GTT_WRITE        (1 << 2)
> +enum pipe_control_flags
> +{
> +   PIPE_CONTROL_FLUSH_LLC                       = (1 << 1),
> +   PIPE_CONTROL_LRI_POST_SYNC_OP                = (1 << 2),
> +   PIPE_CONTROL_STORE_DATA_INDEX                = (1 << 3),
> +   PIPE_CONTROL_CS_STALL                        = (1 << 4),
> +   PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET     = (1 << 5),
> +   PIPE_CONTROL_SYNC_GFDT                       = (1 << 6),
> +   PIPE_CONTROL_TLB_INVALIDATE                  = (1 << 7),
> +   PIPE_CONTROL_MEDIA_STATE_CLEAR               = (1 << 8),
> +   PIPE_CONTROL_WRITE_IMMEDIATE                 = (1 << 9),
> +   PIPE_CONTROL_WRITE_DEPTH_COUNT               = (1 << 10),
> +   PIPE_CONTROL_WRITE_TIMESTAMP                 = (1 << 11),
> +   PIPE_CONTROL_DEPTH_STALL                     = (1 << 12),
> +   PIPE_CONTROL_RENDER_TARGET_FLUSH             = (1 << 13),
> +   PIPE_CONTROL_INSTRUCTION_INVALIDATE          = (1 << 14),
> +   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE        = (1 << 15),
> +   PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE = (1 << 16),
> +   PIPE_CONTROL_NOTIFY_ENABLE                   = (1 << 17),
> +   PIPE_CONTROL_FLUSH_ENABLE                    = (1 << 18),
> +   PIPE_CONTROL_DATA_CACHE_FLUSH                = (1 << 19),
> +   PIPE_CONTROL_VF_CACHE_INVALIDATE             = (1 << 20),
> +   PIPE_CONTROL_CONST_CACHE_INVALIDATE          = (1 << 21),
> +   PIPE_CONTROL_STATE_CACHE_INVALIDATE          = (1 << 22),
> +   PIPE_CONTROL_STALL_AT_SCOREBOARD             = (1 << 23),
> +   PIPE_CONTROL_DEPTH_CACHE_FLUSH               = (1 << 24),
> +};
>  
>  #define PIPE_CONTROL_CACHE_FLUSH_BITS \
>     (PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DATA_CACHE_FLUSH | \
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index f6acf81b899..b62890729fa 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -95,6 +95,37 @@ extern const struct brw_tracked_state gen7_urb;
>  extern const struct brw_tracked_state gen8_pma_fix;
>  extern const struct brw_tracked_state brw_cs_work_groups_surface;
>  
> +void gen4_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen45_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                 struct brw_bo *bo, uint32_t offset,
> +                                 uint64_t imm);
> +void gen5_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen6_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen7_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen75_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                 struct brw_bo *bo, uint32_t offset,
> +                                 uint64_t imm);
> +void gen8_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen9_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                struct brw_bo *bo, uint32_t offset,
> +                                uint64_t imm);
> +void gen10_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                 struct brw_bo *bo, uint32_t offset,
> +                                 uint64_t imm);
> +void gen11_emit_raw_pipe_control(struct brw_context *brw, uint32_t flags,
> +                                 struct brw_bo *bo, uint32_t offset,
> +                                 uint64_t imm);
> +
>  static inline bool
>  brw_state_dirty(const struct brw_context *brw,
>                  GLuint mesa_flags, uint64_t brw_flags)
> diff --git a/src/mesa/drivers/dri/i965/genX_pipe_control.c 
> b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> new file mode 100644
> index 00000000000..8eb37444253
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/genX_pipe_control.c
> @@ -0,0 +1,243 @@
> +/*
> + * Copyright © 2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "genX_boilerplate.h"
> +#include "brw_defines.h"
> +#include "brw_state.h"
> +
> +/**
> + * According to the latest documentation, any PIPE_CONTROL with the
> + * "Command Streamer Stall" bit set must also have another bit set,
> + * with five different options:
> + *
> + *  - Render Target Cache Flush
> + *  - Depth Cache Flush
> + *  - Stall at Pixel Scoreboard
> + *  - Post-Sync Operation
> + *  - Depth Stall
> + *  - DC Flush Enable
> + *
> + * I chose "Stall at Pixel Scoreboard" since we've used it effectively
> + * in the past, but the choice is fairly arbitrary.
> + */
> +static void
> +gen8_add_cs_stall_workaround_bits(uint32_t *flags)
> +{
> +   uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
> +                      PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +                      PIPE_CONTROL_WRITE_IMMEDIATE |
> +                      PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +                      PIPE_CONTROL_WRITE_TIMESTAMP |
> +                      PIPE_CONTROL_STALL_AT_SCOREBOARD |
> +                      PIPE_CONTROL_DEPTH_STALL |
> +                      PIPE_CONTROL_DATA_CACHE_FLUSH;
> +
> +   /* If we're doing a CS stall, and don't already have one of the
> +    * workaround bits set, add "Stall at Pixel Scoreboard."
> +    */
> +   if ((*flags & PIPE_CONTROL_CS_STALL) != 0 && (*flags & wa_bits) == 0)
> +      *flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +}
> +
> +/* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
> + *
> + * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
> + *  only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
> + *
> + * Note that the kernel does CS stalls between batches, so we only need
> + * to count them within a batch.
> + */
> +static uint32_t
> +gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t 
> flags)
> +{
> +   if (GEN_GEN == 7 && !GEN_IS_HASWELL) {
> +      if (flags & PIPE_CONTROL_CS_STALL) {
> +         /* If we're doing a CS stall, reset the counter and carry on. */
> +         brw->pipe_controls_since_last_cs_stall = 0;
> +         return 0;
> +      }
> +
> +      /* If this is the fourth pipe control without a CS stall, do one now. 
> */
> +      if (++brw->pipe_controls_since_last_cs_stall == 4) {
> +         brw->pipe_controls_since_last_cs_stall = 0;
> +         return PIPE_CONTROL_CS_STALL;
> +      }
> +   }
> +   return 0;
> +}
> +
> +/* #1130 from gen10 workarounds page in h/w specs:
> + * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
> + *  not enabled in same PIPE CONTROL and Enable Pixel score board stall if
> + *  Render target cache flush is enabled."
> + *
> + * Applicable to CNL B0 and C0 steppings only.
> + */
> +static void
> +gen10_add_rcpfe_workaround_bits(uint32_t *flags)
> +{
> +   if (*flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
> +      *flags = *flags | PIPE_CONTROL_STALL_AT_SCOREBOARD;
> +   } else if (*flags &
> +             (PIPE_CONTROL_WRITE_IMMEDIATE |
> +              PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +              PIPE_CONTROL_WRITE_TIMESTAMP)) {
> +      *flags = *flags | PIPE_CONTROL_DEPTH_STALL;
> +   }
> +}
> +
> +static unsigned
> +flags_to_post_sync_op(uint32_t flags)
> +{
> +   flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
> +            PIPE_CONTROL_WRITE_DEPTH_COUNT |
> +            PIPE_CONTROL_WRITE_TIMESTAMP;
> +
> +   assert(util_bitcount(flags) <= 1);
> +
> +   if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
> +      return WriteImmediateData;
> +
> +   if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
> +      return WritePSDepthCount;
> +
> +   if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
> +      return WriteTimestamp;
> +
> +   return 0;
> +}
> +
> +void
> +genX(emit_raw_pipe_control)(struct brw_context *brw, uint32_t flags,
> +                            struct brw_bo *bo, uint32_t offset, uint64_t imm)
> +{
> +   if (GEN_GEN >= 8) {
> +      if (GEN_GEN == 8)
> +         gen8_add_cs_stall_workaround_bits(&flags);
> +
> +      if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
> +         if (GEN_GEN == 9) {
> +            /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit 
> description
> +             * lists several workarounds:
> +             *
> +             *    "Project: SKL, KBL, BXT
> +             *
> +             *     If the VF Cache Invalidation Enable is set to a 1 in a
> +             *     PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
> +             *     sets to 0, with the VF Cache Invalidation Enable set to 0
> +             *     needs to be sent prior to the PIPE_CONTROL with VF Cache
> +             *     Invalidation Enable set to a 1."
> +             */
> +            brw_emit_pipe_control_flush(brw, 0);
> +         }
> +
> +         if (GEN_GEN >= 9) {
> +            /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
> +             *
> +             *    "Project: BDW+
> +             *
> +             *     When VF Cache Invalidate is set “Post Sync Operation” must
> +             *     be enabled to “Write Immediate Data” or “Write PS Depth
> +             *     Count” or “Write Timestamp”."
> +             *
> +             * If there's a BO, we're already doing some kind of write.
> +             * If not, add a write to the workaround BO.
> +             *
> +             * XXX: This causes GPU hangs on Broadwell, so restrict it to
> +             *      Gen9+ for now...see this bug for more information:
> +             *      https://bugs.freedesktop.org/show_bug.cgi?id=103787
> +             */
> +            if (!bo) {
> +               flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
> +               bo = brw->workaround_bo;
> +            }
> +         }
> +      }
> +
> +      if (GEN_GEN == 10)
> +         gen10_add_rcpfe_workaround_bits(&flags);
> +   } else if (GEN_GEN >= 6) {
> +      if (GEN_GEN == 6 &&
> +          (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
> +         /* Hardware workaround: SNB B-Spec says:
> +          *
> +          *   [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
> +          *   Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
> +          *   required.
> +          */
> +         brw_emit_post_sync_nonzero_flush(brw);
> +      }
> +
> +      flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
> +   }
> +
> +   brw_batch_emit(brw, GENX(PIPE_CONTROL), pc) {
> +   #if GEN_GEN >= 9
> +      pc.FlushLLC = 0;
> +   #endif
> +   #if GEN_GEN >= 7
> +      pc.LRIPostSyncOperation = NoLRIOperation;
> +      pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
> +      pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
> +   #endif
> +   #if GEN_GEN >= 6
> +      pc.StoreDataIndex = 0;
> +      pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
> +      pc.GlobalSnapshotCountReset =
> +         flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
> +      pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;

While comparing this against src/intel/genxml/gen*.xml I noticed that gen6
also has:

<field name="Synchronize GFDT Surface" start="49" end="49" type="bool"/>

Should we add it for consistency?

> +      pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
> +      pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;

gen5 also has:

<field name="Stall At Pixel Scoreboard" start="33" end="33" type="bool"/>

We didn't have this before and therefore this patch is fine. I just thought
I write it down since I came across that.

> +      pc.RenderTargetCacheFlushEnable =
> +         flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
> +      pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
> +      pc.StateCacheInvalidationEnable =
> +         flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
> +      pc.VFCacheInvalidationEnable = flags & 
> PIPE_CONTROL_VF_CACHE_INVALIDATE;
> +      pc.ConstantCacheInvalidationEnable =
> +         flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
> +   #else
> +      pc.WriteCacheFlush = flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
> +   #endif
> +      pc.PostSyncOperation = flags_to_post_sync_op(flags);
> +      pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
> +      pc.InstructionCacheInvalidateEnable =
> +         flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
> +      pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
> +   #if GEN_GEN >= 5 || GEN_IS_G4X
> +      pc.IndirectStatePointersDisable =
> +         flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
> +   #endif
> +   #if GEN_GEN >= 6
> +      pc.TextureCacheInvalidationEnable =
> +         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
> +   #elif GEN_GEN == 5 || GEN_IS_G4X
> +      pc.TextureCacheFlushEnable =
> +         flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;

gen5 also has:

<field name="Depth Cache Flush Inhibit" start="32" end="32" type="uint">

> +   #endif
> +      pc.Address = ggtt_bo(bo, offset);
> +      if (GEN_GEN < 7 && bo)
> +         pc.DestinationAddressType = DAT_GGTT;
> +      pc.ImmediateData = imm;
> +   }
> +}
> diff --git a/src/mesa/drivers/dri/i965/meson.build 
> b/src/mesa/drivers/dri/i965/meson.build
> index 02f5f3073f7..e7c890785e2 100644
> --- a/src/mesa/drivers/dri/i965/meson.build
> +++ b/src/mesa/drivers/dri/i965/meson.build
> @@ -147,8 +147,8 @@ i965_gen_libs = []
>  foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
>    i965_gen_libs += static_library(
>      'i965_gen@0@'.format(v),
> -    ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_state_upload.c',
> -     gen_xml_pack],
> +    ['genX_blorp_exec.c', 'genX_boilerplate.h', 'genX_pipe_control.c',
> +     'genX_state_upload.c', gen_xml_pack],
>      include_directories : [inc_common, inc_intel, inc_dri_common],
>      c_args : [
>        c_vis_args, no_override_init_args, c_sse2_args,
> -- 
> 2.19.1
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to