On Friday, April 6, 2018 7:31:41 AM PDT Lionel Landwerlin wrote:
> v2: condition the extension on context isolation support from the
>     kernel (Chris)
> 
> v3: (Lionel)
> 
>     The initial version of this change used a feature of the Gen7+
>     command parser to turn the primitive instructions into no-ops.
>     Unfortunately this doesn't play well with how we're using the
>     hardware outside of the user submitted commands. For example
>     resolves are implicit operations which should not be turned into
>     no-ops as part of the previously submitted commands (before
>     blackhole_render is enabled) might not be disabled. For example
>     this sequence :
> 
>        glClear();
>        glEnable(GL_BLACKHOLE_RENDER_INTEL);
>        glDrawArrays(...);
>        glReadPixels(...);
>        glDisable(GL_BLACKHOLE_RENDER_INTEL);
> 
>     While clear has been emitted outside the blackhole render, it
>     should still be resolved properly in the read pixels. Hence we
>     need to be more selective and only disable user submitted
>     commands.
> 
>     This v3 manually turns primitives into MI_NOOP if blackhole render
>     is enabled. This lets us enable this feature on any platform.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_compute.c      | 46 +++++++++++---------
>  src/mesa/drivers/dri/i965/brw_defines.h      |  8 +++-
>  src/mesa/drivers/dri/i965/brw_draw.c         | 20 ++++++---
>  src/mesa/drivers/dri/i965/intel_extensions.c |  1 +
>  4 files changed, 49 insertions(+), 26 deletions(-)

This is going to run into trouble on some platforms.  For example, on
Ivybridge, I believe you're only allowed to emit 3DSTATE_CONSTANT_* once
per 3DPRIMITIVE command.  With this, we turn those 3DPRIMITIVEs into
series of MI_NOOPs, which means that we might get multiple sets of
3DSTATE_CONSTANTs before a real draw.  This will likely GPU hang.

I want to say there are a couple other touchy commands on older
generations as well, but I can't think of what they are offhand.

Would it work to emit a null primitive, i.e. a 3DPRIMITIVE with 0
vertices?  That would still do any constant committing and flushing,
but should avoid any EU related work.

> diff --git a/src/mesa/drivers/dri/i965/brw_compute.c 
> b/src/mesa/drivers/dri/i965/brw_compute.c
> index 5ce899bcbcc..a368e5fb2c6 100644
> --- a/src/mesa/drivers/dri/i965/brw_compute.c
> +++ b/src/mesa/drivers/dri/i965/brw_compute.c
> @@ -131,29 +131,35 @@ brw_emit_gpgpu_walker(struct brw_context *brw)
>     if (right_non_aligned != 0)
>        right_mask >>= (simd_size - right_non_aligned);
>  
> +   struct gl_context *ctx = &brw->ctx;
>     uint32_t dwords = devinfo->gen < 8 ? 11 : 15;
>     BEGIN_BATCH(dwords);
> -   OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
> -   OUT_BATCH(0);
> -   if (devinfo->gen >= 8) {
> -      OUT_BATCH(0);                     /* Indirect Data Length */
> -      OUT_BATCH(0);                     /* Indirect Data Start Address */
> +   if (ctx->IntelBlackholeRender) {
> +      for (uint32_t d = 0; d < dwords; d++)
> +         OUT_BATCH(MI_NOOP);
> +   } else {
> +      OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag);
> +      OUT_BATCH(0);
> +      if (devinfo->gen >= 8) {
> +         OUT_BATCH(0);                     /* Indirect Data Length */
> +         OUT_BATCH(0);                     /* Indirect Data Start Address */
> +      }
> +      assert(thread_width_max <= brw->screen->devinfo.max_cs_threads);
> +      OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
> +                SET_FIELD(thread_width_max - 1, 
> GPGPU_WALKER_THREAD_WIDTH_MAX));
> +      OUT_BATCH(0);                        /* Thread Group ID Starting X */
> +      if (devinfo->gen >= 8)
> +         OUT_BATCH(0);                     /* MBZ */
> +      OUT_BATCH(num_groups[0]);            /* Thread Group ID X Dimension */
> +      OUT_BATCH(0);                        /* Thread Group ID Starting Y */
> +      if (devinfo->gen >= 8)
> +         OUT_BATCH(0);                     /* MBZ */
> +      OUT_BATCH(num_groups[1]);            /* Thread Group ID Y Dimension */
> +      OUT_BATCH(0);                        /* Thread Group ID 
> Starting/Resume Z */
> +      OUT_BATCH(num_groups[2]);            /* Thread Group ID Z Dimension */
> +      OUT_BATCH(right_mask);               /* Right Execution Mask */
> +      OUT_BATCH(0xffffffff);               /* Bottom Execution Mask */
>     }
> -   assert(thread_width_max <= brw->screen->devinfo.max_cs_threads);
> -   OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) |
> -             SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX));
> -   OUT_BATCH(0);                        /* Thread Group ID Starting X */
> -   if (devinfo->gen >= 8)
> -      OUT_BATCH(0);                     /* MBZ */
> -   OUT_BATCH(num_groups[0]);            /* Thread Group ID X Dimension */
> -   OUT_BATCH(0);                        /* Thread Group ID Starting Y */
> -   if (devinfo->gen >= 8)
> -      OUT_BATCH(0);                     /* MBZ */
> -   OUT_BATCH(num_groups[1]);            /* Thread Group ID Y Dimension */
> -   OUT_BATCH(0);                        /* Thread Group ID Starting/Resume Z 
> */
> -   OUT_BATCH(num_groups[2]);            /* Thread Group ID Z Dimension */
> -   OUT_BATCH(right_mask);               /* Right Execution Mask */
> -   OUT_BATCH(0xffffffff);               /* Bottom Execution Mask */
>     ADVANCE_BATCH();
>  
>     BEGIN_BATCH(2);
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8bf6f68b67c..c8a597c8ad0 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define GEN10_CACHE_MODE_SS            0x0e420
>  #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>  
> -#define INSTPM                             0x20c0
> +#define INSTPM                             0x20c0 /* Gen6-8 */
>  # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6)
> +# define INSTPM_GLOBAL_DEBUG_ENABLE                    (1 << 4)
> +# define INSTPM_MEDIA_INSTRUCTION_DISABLE              (1 << 3)
> +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 2)
> +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE           (1 << 1)
>  
>  #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
>  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
> +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE              (1 << 1)
> +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE        (1 << 0)
>  
>  #define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gen9+ */
>  # define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 4caaadd560d..9d44f2b6026 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -250,6 +250,7 @@ brw_emit_prim(struct brw_context *brw,
>        indirect_flag = 0;
>     }
>  
> +   struct gl_context *ctx = &brw->ctx;
>     BEGIN_BATCH(devinfo->gen >= 7 ? 7 : 6);
>  
>     if (devinfo->gen >= 7) {
> @@ -257,12 +258,21 @@ brw_emit_prim(struct brw_context *brw,
>           (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT)
>           ? GEN7_3DPRIM_PREDICATE_ENABLE : 0;
>  
> -      OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | 
> predicate_enable);
> -      OUT_BATCH(hw_prim | vertex_access_type);
> +      if (ctx->IntelBlackholeRender) {
> +         OUT_BATCH(MI_NOOP);
> +         OUT_BATCH(MI_NOOP);
> +      } else {
> +         OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | 
> predicate_enable);
> +         OUT_BATCH(hw_prim | vertex_access_type);
> +      }
>     } else {
> -      OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
> -                hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
> -                vertex_access_type);
> +      if (ctx->IntelBlackholeRender) {
> +         OUT_BATCH(MI_NOOP);
> +      } else {
> +         OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
> +                   hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
> +                   vertex_access_type);
> +      }
>     }
>     OUT_BATCH(verts_per_instance);
>     OUT_BATCH(start_vertex_location);
> diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
> b/src/mesa/drivers/dri/i965/intel_extensions.c
> index 73a6c73f537..171dc05fe24 100644
> --- a/src/mesa/drivers/dri/i965/intel_extensions.c
> +++ b/src/mesa/drivers/dri/i965/intel_extensions.c
> @@ -120,6 +120,7 @@ intelInitExtensions(struct gl_context *ctx)
>     ctx->Extensions.APPLE_object_purgeable = true;
>     ctx->Extensions.ATI_separate_stencil = true;
>     ctx->Extensions.ATI_texture_env_combine3 = true;
> +   ctx->Extensions.INTEL_blackhole_render = true;
>     ctx->Extensions.MESA_pack_invert = true;
>     ctx->Extensions.NV_conditional_render = true;
>     ctx->Extensions.NV_primitive_restart = true;
> 

Attachment: signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to