On Friday, April 6, 2018 7:31:41 AM PDT Lionel Landwerlin wrote: > v2: condition the extension on context isolation support from the > kernel (Chris) > > v3: (Lionel) > > The initial version of this change used a feature of the Gen7+ > command parser to turn the primitive instructions into no-ops. > Unfortunately this doesn't play well with how we're using the > hardware outside of the user submitted commands. For example > resolves are implicit operations which should not be turned into > no-ops as part of the previously submitted commands (before > blackhole_render is enabled) might not be disabled. For example > this sequence : > > glClear(); > glEnable(GL_BLACKHOLE_RENDER_INTEL); > glDrawArrays(...); > glReadPixels(...); > glDisable(GL_BLACKHOLE_RENDER_INTEL); > > While clear has been emitted outside the blackhole render, it > should still be resolved properly in the read pixels. Hence we > need to be more selective and only disable user submitted > commands. > > This v3 manually turns primitives into MI_NOOP if blackhole render > is enabled. This lets us enable this feature on any platform. > > Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> > --- > src/mesa/drivers/dri/i965/brw_compute.c | 46 +++++++++++--------- > src/mesa/drivers/dri/i965/brw_defines.h | 8 +++- > src/mesa/drivers/dri/i965/brw_draw.c | 20 ++++++--- > src/mesa/drivers/dri/i965/intel_extensions.c | 1 + > 4 files changed, 49 insertions(+), 26 deletions(-)
This is going to run into trouble on some platforms. For example, on Ivybridge, I believe you're only allowed to emit 3DSTATE_CONSTANT_* once per 3DPRIMITIVE command. With this, we turn those 3DPRIMITIVEs into series of MI_NOOPs, which means that we might get multiple sets of 3DSTATE_CONSTANTs before a real draw. This will likely GPU hang. I want to say there are a couple other touchy commands on older generations as well, but I can't think of what they are offhand. Would it work to emit a null primitive, i.e. a 3DPRIMITIVE with 0 vertices? That would still do any constant committing and flushing, but should avoid any EU related work. > diff --git a/src/mesa/drivers/dri/i965/brw_compute.c > b/src/mesa/drivers/dri/i965/brw_compute.c > index 5ce899bcbcc..a368e5fb2c6 100644 > --- a/src/mesa/drivers/dri/i965/brw_compute.c > +++ b/src/mesa/drivers/dri/i965/brw_compute.c > @@ -131,29 +131,35 @@ brw_emit_gpgpu_walker(struct brw_context *brw) > if (right_non_aligned != 0) > right_mask >>= (simd_size - right_non_aligned); > > + struct gl_context *ctx = &brw->ctx; > uint32_t dwords = devinfo->gen < 8 ? 11 : 15; > BEGIN_BATCH(dwords); > - OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag); > - OUT_BATCH(0); > - if (devinfo->gen >= 8) { > - OUT_BATCH(0); /* Indirect Data Length */ > - OUT_BATCH(0); /* Indirect Data Start Address */ > + if (ctx->IntelBlackholeRender) { > + for (uint32_t d = 0; d < dwords; d++) > + OUT_BATCH(MI_NOOP); > + } else { > + OUT_BATCH(GPGPU_WALKER << 16 | (dwords - 2) | indirect_flag); > + OUT_BATCH(0); > + if (devinfo->gen >= 8) { > + OUT_BATCH(0); /* Indirect Data Length */ > + OUT_BATCH(0); /* Indirect Data Start Address */ > + } > + assert(thread_width_max <= brw->screen->devinfo.max_cs_threads); > + OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) | > + SET_FIELD(thread_width_max - 1, > GPGPU_WALKER_THREAD_WIDTH_MAX)); > + OUT_BATCH(0); /* Thread Group ID Starting X */ > + if (devinfo->gen >= 8) > + OUT_BATCH(0); /* MBZ */ > + OUT_BATCH(num_groups[0]); /* Thread Group ID X Dimension */ > + OUT_BATCH(0); /* Thread Group ID Starting Y */ > + if (devinfo->gen >= 8) > + OUT_BATCH(0); /* MBZ */ > + OUT_BATCH(num_groups[1]); /* Thread Group ID Y Dimension */ > + OUT_BATCH(0); /* Thread Group ID > Starting/Resume Z */ > + OUT_BATCH(num_groups[2]); /* Thread Group ID Z Dimension */ > + OUT_BATCH(right_mask); /* Right Execution Mask */ > + OUT_BATCH(0xffffffff); /* Bottom Execution Mask */ > } > - assert(thread_width_max <= brw->screen->devinfo.max_cs_threads); > - OUT_BATCH(SET_FIELD(simd_size / 16, GPGPU_WALKER_SIMD_SIZE) | > - SET_FIELD(thread_width_max - 1, GPGPU_WALKER_THREAD_WIDTH_MAX)); > - OUT_BATCH(0); /* Thread Group ID Starting X */ > - if (devinfo->gen >= 8) > - OUT_BATCH(0); /* MBZ */ > - OUT_BATCH(num_groups[0]); /* Thread Group ID X Dimension */ > - OUT_BATCH(0); /* Thread Group ID Starting Y */ > - if (devinfo->gen >= 8) > - OUT_BATCH(0); /* MBZ */ > - OUT_BATCH(num_groups[1]); /* Thread Group ID Y Dimension */ > - OUT_BATCH(0); /* Thread Group ID Starting/Resume Z > */ > - OUT_BATCH(num_groups[2]); /* Thread Group ID Z Dimension */ > - OUT_BATCH(right_mask); /* Right Execution Mask */ > - OUT_BATCH(0xffffffff); /* Bottom Execution Mask */ > ADVANCE_BATCH(); > > BEGIN_BATCH(2); > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index 8bf6f68b67c..c8a597c8ad0 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -1650,11 +1650,17 @@ enum brw_pixel_shader_coverage_mask_mode { > #define GEN10_CACHE_MODE_SS 0x0e420 > #define GEN10_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4) > > -#define INSTPM 0x20c0 > +#define INSTPM 0x20c0 /* Gen6-8 */ > # define INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 6) > +# define INSTPM_GLOBAL_DEBUG_ENABLE (1 << 4) > +# define INSTPM_MEDIA_INSTRUCTION_DISABLE (1 << 3) > +# define INSTPM_3D_RENDERER_INSTRUCTION_DISABLE (1 << 2) > +# define INSTPM_3D_STATE_INSTRUCTION_DISABLE (1 << 1) > > #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */ > # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4) > +# define CSDBG2_MEDIA_INSTRUCTION_DISABLE (1 << 1) > +# define CSDBG2_3D_RENDERER_INSTRUCTION_DISABLE (1 << 0) > > #define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */ > # define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7) > diff --git a/src/mesa/drivers/dri/i965/brw_draw.c > b/src/mesa/drivers/dri/i965/brw_draw.c > index 4caaadd560d..9d44f2b6026 100644 > --- a/src/mesa/drivers/dri/i965/brw_draw.c > +++ b/src/mesa/drivers/dri/i965/brw_draw.c > @@ -250,6 +250,7 @@ brw_emit_prim(struct brw_context *brw, > indirect_flag = 0; > } > > + struct gl_context *ctx = &brw->ctx; > BEGIN_BATCH(devinfo->gen >= 7 ? 7 : 6); > > if (devinfo->gen >= 7) { > @@ -257,12 +258,21 @@ brw_emit_prim(struct brw_context *brw, > (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) > ? GEN7_3DPRIM_PREDICATE_ENABLE : 0; > > - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | > predicate_enable); > - OUT_BATCH(hw_prim | vertex_access_type); > + if (ctx->IntelBlackholeRender) { > + OUT_BATCH(MI_NOOP); > + OUT_BATCH(MI_NOOP); > + } else { > + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | > predicate_enable); > + OUT_BATCH(hw_prim | vertex_access_type); > + } > } else { > - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | > - hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | > - vertex_access_type); > + if (ctx->IntelBlackholeRender) { > + OUT_BATCH(MI_NOOP); > + } else { > + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | > + hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | > + vertex_access_type); > + } > } > OUT_BATCH(verts_per_instance); > OUT_BATCH(start_vertex_location); > diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c > b/src/mesa/drivers/dri/i965/intel_extensions.c > index 73a6c73f537..171dc05fe24 100644 > --- a/src/mesa/drivers/dri/i965/intel_extensions.c > +++ b/src/mesa/drivers/dri/i965/intel_extensions.c > @@ -120,6 +120,7 @@ intelInitExtensions(struct gl_context *ctx) > ctx->Extensions.APPLE_object_purgeable = true; > ctx->Extensions.ATI_separate_stencil = true; > ctx->Extensions.ATI_texture_env_combine3 = true; > + ctx->Extensions.INTEL_blackhole_render = true; > ctx->Extensions.MESA_pack_invert = true; > ctx->Extensions.NV_conditional_render = true; > ctx->Extensions.NV_primitive_restart = true; >
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev