Module: Mesa Branch: master Commit: 740350c982bd2735b9eb9063c2b91856b6f1ad31 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=740350c982bd2735b9eb9063c2b91856b6f1ad31
Author: Eric Anholt <[email protected]> Date: Thu Mar 14 14:41:37 2013 -0700 i965: Make the fragment shader pull constants index by dwords, not vec4s. We want to load vec4s, since loading a vec4 instead of a dword is basically no increased latency. But for variable indexed access, the previous requirement of aligned vec4s for a sampler LD was hard to implement. Note that this change only affects those messages that use the surface format, like sampler LDs, but not to the untyped data cache loads we've used in other cases. No significant performance difference on my GLSL demo with uniforms forced to take the varying pull constants path (n=4). NOTE: This is a candidate for the 9.1 branch. Reviewed-by: Kenneth Graunke <[email protected]> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 5 ++++- src/mesa/drivers/dri/i965/brw_state.h | 5 ----- src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 13 ++++++++----- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 5 +++-- src/mesa/drivers/dri/intel/intel_context.h | 5 +++-- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7c9ac66..da3ac15 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2478,10 +2478,13 @@ fs_visitor::lower_uniform_pull_constant_loads() continue; if (intel->gen >= 7) { + /* The offset arg before was a vec4-aligned byte offset. We need to + * turn it into a dword offset. + */ fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.imm.u /= 16; + const_offset_reg.imm.u /= 4; fs_reg payload = fs_reg(this, glsl_type::uint_type); /* This is actually going to be a MOV, but since only the first dword diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 1f5e18a..0914cdd 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -187,11 +187,6 @@ void *brw_state_batch(struct brw_context *brw, void gen4_init_vtable_surface_functions(struct brw_context *brw); uint32_t brw_get_surface_tiling_bits(uint32_t tiling); uint32_t brw_get_surface_num_multisamples(unsigned num_samples); -void brw_create_constant_surface(struct brw_context *brw, - drm_intel_bo *bo, - uint32_t offset, - int width, - uint32_t *out_offset); uint32_t brw_format_for_mesa_format(gl_format mesa_format); diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index 6c0b690..675a84c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -91,7 +91,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw) const int surf = SURF_INDEX_VERT_CONST_BUFFER; intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size, - &brw->vs.surf_offset[surf]); + &brw->vs.surf_offset[surf], false); brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index e458da7..a74b2c7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -913,15 +913,16 @@ brw_update_texture_surface(struct gl_context *ctx, * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -void +static void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, uint32_t offset, uint32_t size, - uint32_t *out_offset) + uint32_t *out_offset, + bool dword_pitch) { struct intel_context *intel = &brw->intel; - uint32_t stride = 16; + uint32_t stride = dword_pitch ? 4 : 16; uint32_t elements = ALIGN(size, stride) / stride; const GLint w = elements - 1; uint32_t *surf; @@ -1090,7 +1091,8 @@ brw_upload_wm_pull_constants(struct brw_context *brw) drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo); intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size, - &brw->wm.surf_offset[surf_index]); + &brw->wm.surf_offset[surf_index], + true); brw->state.dirty.brw |= BRW_NEW_SURFACES; } @@ -1443,7 +1445,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw, */ intel->vtbl.create_constant_surface(brw, bo, binding->Offset, bo->size - binding->Offset, - &surf_offsets[i]); + &surf_offsets[i], + shader->Type == GL_FRAGMENT_SHADER); } if (shader->NumUniformBlocks) diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 484afcd..2c12be3 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -384,10 +384,11 @@ gen7_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, uint32_t offset, uint32_t size, - uint32_t *out_offset) + uint32_t *out_offset, + bool dword_pitch) { struct intel_context *intel = &brw->intel; - uint32_t stride = 16; + uint32_t stride = dword_pitch ? 4 : 16; uint32_t elements = ALIGN(size, stride) / stride; const GLint w = elements - 1; diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 958db1c..b130c02 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -203,13 +203,14 @@ struct intel_context drm_intel_bo *bo, uint32_t offset, uint32_t size, - uint32_t *out_offset); + uint32_t *out_offset, + bool dword_pitch); /** \} */ } vtbl; GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ GLuint NewGLState; - + dri_bufmgr *bufmgr; unsigned int maxBatchSize; _______________________________________________ mesa-commit mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-commit
