On Fri, Aug 19, 2016 at 09:55:54AM -0700, Jason Ekstrand wrote: > --- > src/mesa/drivers/dri/i965/blorp.c | 66 > ----------------------------- > src/mesa/drivers/dri/i965/genX_blorp_exec.c | 66 > +++++++++++++++++++++++++++++ > src/mesa/drivers/dri/i965/genX_blorp_exec.h | 8 ++-- > 3 files changed, 70 insertions(+), 70 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/blorp.c > b/src/mesa/drivers/dri/i965/blorp.c > index dba3441..0688f6b 100644 > --- a/src/mesa/drivers/dri/i965/blorp.c > +++ b/src/mesa/drivers/dri/i965/blorp.c > @@ -347,28 +347,6 @@ brw_blorp_compile_nir_shader(struct brw_context *brw, > struct nir_shader *nir, > void > brw_blorp_exec(struct brw_context *brw, const struct brw_blorp_params > *params) > { > - struct gl_context *ctx = &brw->ctx; > - const uint32_t estimated_max_batch_usage = brw->gen >= 8 ? 1800 : 1500; > - bool check_aperture_failed_once = false; > - > - /* Flush the sampler and render caches. We definitely need to flush the > - * sampler cache so that we get updated contents from the render cache for > - * the glBlitFramebuffer() source. Also, we are sometimes warned in the > - * docs to flush the cache between reinterpretations of the same surface > - * data with different formats, which blorp does for stencil and depth > - * data. > - */ > - brw_emit_mi_flush(brw); > - > - brw_select_pipeline(brw, BRW_RENDER_PIPELINE); > - > -retry: > - intel_batchbuffer_require_space(brw, estimated_max_batch_usage, > RENDER_RING); > - intel_batchbuffer_save_state(brw); > - drm_intel_bo *saved_bo = brw->batch.bo; > - uint32_t saved_used = USED_BATCH(brw->batch); > - uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; > - > switch (brw->gen) { > case 6: > gen6_blorp_exec(brw, params); > @@ -389,50 +367,6 @@ retry: > /* BLORP is not supported before Gen6. */ > unreachable("not reached"); > } > - > - /* Make sure we didn't wrap the batch unintentionally, and make sure we > - * reserved enough space that a wrap will never happen. > - */ > - assert(brw->batch.bo == saved_bo); > - assert((USED_BATCH(brw->batch) - saved_used) * 4 + > - (saved_state_batch_offset - brw->batch.state_batch_offset) < > - estimated_max_batch_usage); > - /* Shut up compiler warnings on release build */ > - (void)saved_bo; > - (void)saved_used; > - (void)saved_state_batch_offset; > - > - /* Check if the blorp op we just did would make our batch likely to fail > to > - * map all the BOs into the GPU at batch exec time later. If so, flush > the > - * batch and try again with nothing else in the batch. > - */ > - if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { > - if (!check_aperture_failed_once) { > - check_aperture_failed_once = true; > - intel_batchbuffer_reset_to_saved(brw); > - intel_batchbuffer_flush(brw); > - goto retry; > - } else { > - int ret = intel_batchbuffer_flush(brw); > - WARN_ONCE(ret == -ENOSPC, > - "i965: blorp emit exceeded available aperture space\n"); > - } > - } > - > - if (unlikely(brw->always_flush_batch)) > - intel_batchbuffer_flush(brw); > - > - /* We've smashed all state compared to what the normal 3D pipeline > - * rendering tracks for GL. > - */ > - brw->ctx.NewDriverState |= BRW_NEW_BLORP; > - brw->no_depth_or_stencil = false; > - brw->ib.type = -1; > - > - /* Flush the sampler cache so any texturing from the destination is > - * coherent. > - */ > - brw_emit_mi_flush(brw); > } > > void > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c > b/src/mesa/drivers/dri/i965/genX_blorp_exec.c > index e07fa0a..9ba1f8a 100644 > --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c > @@ -170,6 +170,28 @@ void > genX(blorp_exec)(struct brw_context *brw, > const struct brw_blorp_params *params) > { > + struct gl_context *ctx = &brw->ctx; > + const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1800 : 1500; > + bool check_aperture_failed_once = false; > + > + /* Flush the sampler and render caches. We definitely need to flush the > + * sampler cache so that we get updated contents from the render cache for > + * the glBlitFramebuffer() source. Also, we are sometimes warned in the > + * docs to flush the cache between reinterpretations of the same surface > + * data with different formats, which blorp does for stencil and depth > + * data. > + */ > + brw_emit_mi_flush(brw); > + > + brw_select_pipeline(brw, BRW_RENDER_PIPELINE); > + > +retry: > + intel_batchbuffer_require_space(brw, estimated_max_batch_usage, > RENDER_RING); > + intel_batchbuffer_save_state(brw); > + drm_intel_bo *saved_bo = brw->batch.bo; > + uint32_t saved_used = USED_BATCH(brw->batch); > + uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; > + > #if GEN_GEN == 6 > /* Emit workaround flushes when we switch from drawing to blorping. */ > brw_emit_post_sync_nonzero_flush(brw); > @@ -187,4 +209,48 @@ genX(blorp_exec)(struct brw_context *brw, > brw_emit_depth_stall_flushes(brw); > > blorp_exec(&brw->blorp, brw, params); > + > + /* Make sure we didn't wrap the batch unintentionally, and make sure we > + * reserved enough space that a wrap will never happen. > + */ > + assert(brw->batch.bo == saved_bo); > + assert((USED_BATCH(brw->batch) - saved_used) * 4 + > + (saved_state_batch_offset - brw->batch.state_batch_offset) < > + estimated_max_batch_usage); > + /* Shut up compiler warnings on release build */ > + (void)saved_bo; > + (void)saved_used; > + (void)saved_state_batch_offset; > + > + /* Check if the blorp op we just did would make our batch likely to fail > to > + * map all the BOs into the GPU at batch exec time later. If so, flush > the > + * batch and try again with nothing else in the batch. > + */ > + if (dri_bufmgr_check_aperture_space(&brw->batch.bo, 1)) { > + if (!check_aperture_failed_once) { > + check_aperture_failed_once = true; > + intel_batchbuffer_reset_to_saved(brw); > + intel_batchbuffer_flush(brw); > + goto retry; > + } else { > + int ret = intel_batchbuffer_flush(brw); > + WARN_ONCE(ret == -ENOSPC, > + "i965: blorp emit exceeded available aperture space\n"); > + } > + } > + > + if (unlikely(brw->always_flush_batch)) > + intel_batchbuffer_flush(brw); > + > + /* We've smashed all state compared to what the normal 3D pipeline > + * rendering tracks for GL. > + */ > + brw->ctx.NewDriverState |= BRW_NEW_BLORP; > + brw->no_depth_or_stencil = false; > + brw->ib.type = -1; > + > + /* Flush the sampler cache so any texturing from the destination is > + * coherent. > + */ > + brw_emit_mi_flush(brw); > } > diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.h > b/src/mesa/drivers/dri/i965/genX_blorp_exec.h > index 02a0397..f7fbf04 100644 > --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.h > +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.h > @@ -204,7 +204,7 @@ blorp_emit_input_varying_data(struct blorp_batch batch, > for (unsigned i = 0; i < max_num_varyings; i++) { > const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; > > - if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) > + if (!(params->wm_prog_data->inputs_read & (1ull << attr)))
This looks to belong to the previous patch (type of inputs_read gets changed already there). > continue; > > memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); > @@ -391,7 +391,7 @@ blorp_emit_sf_config(struct blorp_batch batch, > } > > blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + sbe.VertexURBEntryReadOffset = 1; Dropping BRW_SF_URB_ENTRY_READ_OFFSET here and below don't seem to be directly related to this patch either. Otherwise this patch is: Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > sbe.ForceVertexURBEntryReadLength = true; > @@ -419,7 +419,7 @@ blorp_emit_sf_config(struct blorp_batch batch, > } > > blorp_emit(batch, GENX(3DSTATE_SBE), sbe) { > - sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + sbe.VertexURBEntryReadOffset = 1; > if (prog_data) { > sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > @@ -439,7 +439,7 @@ blorp_emit_sf_config(struct blorp_batch batch, > sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ? > MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL; > > - sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; > + sf.VertexURBEntryReadOffset = 1; > if (prog_data) { > sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs; > sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); > -- > 2.5.0.400.gff86faf > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev