On Mon, Jun 19, 2017 at 11:17 AM, Rafael Antognolli <[email protected]> wrote: > On Mon, Jun 19, 2017 at 09:46:30AM -0700, Kristian Høgsberg wrote: >> On Fri, Jun 16, 2017 at 4:31 PM, Rafael Antognolli >> <[email protected]> wrote: >> > The code doesn't get exactly a lot simpler but at least it is in a single >> > place, and we delete more than we add. >> >> Another good point is that you get rid of struct brw_wm_unit_state >> which was a third mechanism for encoding GEN state. We used to have >> GENXML, manual packing and these bitfield structs. Now we're down to >> just GENXML and some manual packing. > > Nice, I think I can add this to the commit message if you don't mind :)
Please do, that's why I brought it up ;-) >> Kristian >> >> > >> > Signed-off-by: Rafael Antognolli <[email protected]> >> > --- >> > src/mesa/drivers/dri/i965/Makefile.sources | 1 - >> > src/mesa/drivers/dri/i965/brw_state.h | 1 - >> > src/mesa/drivers/dri/i965/brw_structs.h | 121 ------------ >> > src/mesa/drivers/dri/i965/brw_wm.h | 2 - >> > src/mesa/drivers/dri/i965/brw_wm_state.c | 274 >> > -------------------------- >> > src/mesa/drivers/dri/i965/genX_state_upload.c | 191 ++++++++++++++---- >> > 6 files changed, 153 insertions(+), 437 deletions(-) >> > delete mode 100644 src/mesa/drivers/dri/i965/brw_wm_state.c >> > >> > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> > b/src/mesa/drivers/dri/i965/Makefile.sources >> > index 89be92e..c15b3ef 100644 >> > --- a/src/mesa/drivers/dri/i965/Makefile.sources >> > +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> > @@ -61,7 +61,6 @@ i965_FILES = \ >> > brw_vs_surface_state.c \ >> > brw_wm.c \ >> > brw_wm.h \ >> > - brw_wm_state.c \ >> > brw_wm_surface_state.c \ >> > gen4_blorp_exec.h \ >> > gen6_clip_state.c \ >> > diff --git a/src/mesa/drivers/dri/i965/brw_state.h >> > b/src/mesa/drivers/dri/i965/brw_state.h >> > index 8f3bd7f..9588a51 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_state.h >> > +++ b/src/mesa/drivers/dri/i965/brw_state.h >> > @@ -89,7 +89,6 @@ extern const struct brw_tracked_state >> > brw_wm_image_surfaces; >> > extern const struct brw_tracked_state brw_cs_ubo_surfaces; >> > extern const struct brw_tracked_state brw_cs_abo_surfaces; >> > extern const struct brw_tracked_state brw_cs_image_surfaces; >> > -extern const struct brw_tracked_state brw_wm_unit; >> > >> > extern const struct brw_tracked_state brw_psp_urb_cbs; >> > >> > diff --git a/src/mesa/drivers/dri/i965/brw_structs.h >> > b/src/mesa/drivers/dri/i965/brw_structs.h >> > index 5a0d91d..fb592be 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_structs.h >> > +++ b/src/mesa/drivers/dri/i965/brw_structs.h >> > @@ -65,127 +65,6 @@ struct brw_urb_fence >> > } bits1; >> > }; >> > >> > -/* State structs for the various fixed function units: >> > - */ >> > - >> > - >> > -struct thread0 >> > -{ >> > - unsigned pad0:1; >> > - unsigned grf_reg_count:3; >> > - unsigned pad1:2; >> > - unsigned kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ >> > -}; >> > - >> > -struct thread1 >> > -{ >> > - unsigned ext_halt_exception_enable:1; >> > - unsigned sw_exception_enable:1; >> > - unsigned mask_stack_exception_enable:1; >> > - unsigned timeout_exception_enable:1; >> > - unsigned illegal_op_exception_enable:1; >> > - unsigned pad0:3; >> > - unsigned depth_coef_urb_read_offset:6; /* WM only */ >> > - unsigned pad1:2; >> > - unsigned floating_point_mode:1; >> > - unsigned thread_priority:1; >> > - unsigned binding_table_entry_count:8; >> > - unsigned pad3:5; >> > - unsigned single_program_flow:1; >> > -}; >> > - >> > -struct thread2 >> > -{ >> > - unsigned per_thread_scratch_space:4; >> > - unsigned pad0:6; >> > - unsigned scratch_space_base_pointer:22; >> > -}; >> > - >> > - >> > -struct thread3 >> > -{ >> > - unsigned dispatch_grf_start_reg:4; >> > - unsigned urb_entry_read_offset:6; >> > - unsigned pad0:1; >> > - unsigned urb_entry_read_length:6; >> > - unsigned pad1:1; >> > - unsigned const_urb_entry_read_offset:6; >> > - unsigned pad2:1; >> > - unsigned const_urb_entry_read_length:6; >> > - unsigned pad3:1; >> > -}; >> > - >> > -struct brw_wm_unit_state >> > -{ >> > - struct thread0 thread0; >> > - struct thread1 thread1; >> > - struct thread2 thread2; >> > - struct thread3 thread3; >> > - >> > - struct { >> > - unsigned stats_enable:1; >> > - unsigned depth_buffer_clear:1; >> > - unsigned sampler_count:3; >> > - unsigned sampler_state_pointer:27; >> > - } wm4; >> > - >> > - struct >> > - { >> > - unsigned enable_8_pix:1; >> > - unsigned enable_16_pix:1; >> > - unsigned enable_32_pix:1; >> > - unsigned enable_con_32_pix:1; >> > - unsigned enable_con_64_pix:1; >> > - unsigned pad0:1; >> > - >> > - /* These next four bits are for Ironlake+ */ >> > - unsigned fast_span_coverage_enable:1; >> > - unsigned depth_buffer_clear:1; >> > - unsigned depth_buffer_resolve_enable:1; >> > - unsigned hierarchical_depth_buffer_resolve_enable:1; >> > - >> > - unsigned legacy_global_depth_bias:1; >> > - unsigned line_stipple:1; >> > - unsigned depth_offset:1; >> > - unsigned polygon_stipple:1; >> > - unsigned line_aa_region_width:2; >> > - unsigned line_endcap_aa_region_width:2; >> > - unsigned early_depth_test:1; >> > - unsigned thread_dispatch_enable:1; >> > - unsigned program_uses_depth:1; >> > - unsigned program_computes_depth:1; >> > - unsigned program_uses_killpixel:1; >> > - unsigned legacy_line_rast: 1; >> > - unsigned transposed_urb_read_enable:1; >> > - unsigned max_threads:7; >> > - } wm5; >> > - >> > - float global_depth_offset_constant; >> > - float global_depth_offset_scale; >> > - >> > - /* for Ironlake only */ >> > - struct { >> > - unsigned pad0:1; >> > - unsigned grf_reg_count_1:3; >> > - unsigned pad1:2; >> > - unsigned kernel_start_pointer_1:26; >> > - } wm8; >> > - >> > - struct { >> > - unsigned pad0:1; >> > - unsigned grf_reg_count_2:3; >> > - unsigned pad1:2; >> > - unsigned kernel_start_pointer_2:26; >> > - } wm9; >> > - >> > - struct { >> > - unsigned pad0:1; >> > - unsigned grf_reg_count_3:3; >> > - unsigned pad1:2; >> > - unsigned kernel_start_pointer_3:26; >> > - } wm10; >> > -}; >> > - >> > struct gen5_sampler_default_color { >> > uint8_t ub[4]; >> > float f[4]; >> > diff --git a/src/mesa/drivers/dri/i965/brw_wm.h >> > b/src/mesa/drivers/dri/i965/brw_wm.h >> > index 613172a..113cdf3 100644 >> > --- a/src/mesa/drivers/dri/i965/brw_wm.h >> > +++ b/src/mesa/drivers/dri/i965/brw_wm.h >> > @@ -41,8 +41,6 @@ >> > extern "C" { >> > #endif >> > >> > -bool brw_color_buffer_write_enabled(struct brw_context *brw); >> > - >> > void >> > brw_upload_wm_prog(struct brw_context *brw); >> > >> > diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c >> > b/src/mesa/drivers/dri/i965/brw_wm_state.c >> > deleted file mode 100644 >> > index 69bbeb2..0000000 >> > --- a/src/mesa/drivers/dri/i965/brw_wm_state.c >> > +++ /dev/null >> > @@ -1,274 +0,0 @@ >> > -/* >> > - Copyright (C) Intel Corp. 2006. All Rights Reserved. >> > - Intel funded Tungsten Graphics to >> > - develop this 3D driver. >> > - >> > - Permission is hereby granted, free of charge, to any person obtaining >> > - a copy of this software and associated documentation files (the >> > - "Software"), to deal in the Software without restriction, including >> > - without limitation the rights to use, copy, modify, merge, publish, >> > - distribute, sublicense, and/or sell copies of the Software, and to >> > - permit persons to whom the Software is furnished to do so, subject to >> > - the following conditions: >> > - >> > - The above copyright notice and this permission notice (including the >> > - next paragraph) shall be included in all copies or substantial >> > - portions of the Software. >> > - >> > - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> > - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> > - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. >> > - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE >> > - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION >> > - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION >> > - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. >> > - >> > - **********************************************************************/ >> > - /* >> > - * Authors: >> > - * Keith Whitwell <[email protected]> >> > - */ >> > - >> > - >> > - >> > -#include "intel_batchbuffer.h" >> > -#include "intel_fbo.h" >> > -#include "brw_context.h" >> > -#include "brw_state.h" >> > -#include "brw_defines.h" >> > -#include "brw_wm.h" >> > -#include "compiler/nir/nir.h" >> > - >> > -/*********************************************************************** >> > - * WM unit - fragment programs and rasterization >> > - */ >> > - >> > -bool >> > -brw_color_buffer_write_enabled(struct brw_context *brw) >> > -{ >> > - struct gl_context *ctx = &brw->ctx; >> > - /* BRW_NEW_FRAGMENT_PROGRAM */ >> > - const struct gl_program *fp = brw->fragment_program; >> > - unsigned i; >> > - >> > - /* _NEW_BUFFERS */ >> > - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { >> > - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; >> > - uint64_t outputs_written = fp->info.outputs_written; >> > - >> > - /* _NEW_COLOR */ >> > - if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || >> > - outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) >> > && >> > - (ctx->Color.ColorMask[i][0] || >> > - ctx->Color.ColorMask[i][1] || >> > - ctx->Color.ColorMask[i][2] || >> > - ctx->Color.ColorMask[i][3])) { >> > - return true; >> > - } >> > - } >> > - >> > - return false; >> > -} >> > - >> > -/** >> > - * Setup wm hardware state. See page 225 of Volume 2 >> > - */ >> > -static void >> > -brw_upload_wm_unit(struct brw_context *brw) >> > -{ >> > - const struct gen_device_info *devinfo = &brw->screen->devinfo; >> > - struct gl_context *ctx = &brw->ctx; >> > - /* BRW_NEW_FRAGMENT_PROGRAM */ >> > - const struct gl_program *fp = brw->fragment_program; >> > - /* BRW_NEW_FS_PROG_DATA */ >> > - const struct brw_wm_prog_data *prog_data = >> > - brw_wm_prog_data(brw->wm.base.prog_data); >> > - struct brw_wm_unit_state *wm; >> > - >> > - wm = brw_state_batch(brw, sizeof(*wm), 32, &brw->wm.base.state_offset); >> > - memset(wm, 0, sizeof(*wm)); >> > - >> > - if (prog_data->dispatch_8 && prog_data->dispatch_16) { >> > - /* These two fields should be the same pre-gen6, which is why we >> > - * only have one hardware field to program for both dispatch >> > - * widths. >> > - */ >> > - assert(prog_data->base.dispatch_grf_start_reg == >> > - prog_data->dispatch_grf_start_reg_2); >> > - } >> > - >> > - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_FS_PROG_DATA */ >> > - wm->wm5.enable_8_pix = prog_data->dispatch_8; >> > - wm->wm5.enable_16_pix = prog_data->dispatch_16; >> > - >> > - if (prog_data->dispatch_8 || prog_data->dispatch_16) { >> > - wm->thread0.grf_reg_count = prog_data->reg_blocks_0; >> > - wm->thread0.kernel_start_pointer = >> > - brw_program_reloc(brw, >> > - brw->wm.base.state_offset + >> > - offsetof(struct brw_wm_unit_state, thread0), >> > - brw->wm.base.prog_offset + >> > - (wm->thread0.grf_reg_count << 1)) >> 6; >> > - } >> > - >> > - if (prog_data->prog_offset_2) { >> > - wm->wm9.grf_reg_count_2 = prog_data->reg_blocks_2; >> > - wm->wm9.kernel_start_pointer_2 = >> > - brw_program_reloc(brw, >> > - brw->wm.base.state_offset + >> > - offsetof(struct brw_wm_unit_state, wm9), >> > - brw->wm.base.prog_offset + >> > - prog_data->prog_offset_2 + >> > - (wm->wm9.grf_reg_count_2 << 1)) >> 6; >> > - } >> > - >> > - wm->thread1.depth_coef_urb_read_offset = 1; >> > - if (prog_data->base.use_alt_mode) >> > - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; >> > - else >> > - wm->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754; >> > - >> > - wm->thread1.binding_table_entry_count = >> > - prog_data->base.binding_table.size_bytes / 4; >> > - >> > - if (prog_data->base.total_scratch != 0) { >> > - wm->thread2.scratch_space_base_pointer = >> > - brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */ >> > - wm->thread2.per_thread_scratch_space = >> > - ffs(brw->wm.base.per_thread_scratch) - 11; >> > - } else { >> > - wm->thread2.scratch_space_base_pointer = 0; >> > - wm->thread2.per_thread_scratch_space = 0; >> > - } >> > - >> > - wm->thread3.dispatch_grf_start_reg = >> > - prog_data->base.dispatch_grf_start_reg; >> > - wm->thread3.urb_entry_read_length = >> > - prog_data->num_varying_inputs * 2; >> > - wm->thread3.urb_entry_read_offset = 0; >> > - wm->thread3.const_urb_entry_read_length = >> > - prog_data->base.curb_read_length; >> > - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ >> > - wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; >> > - >> > - if (brw->gen == 5) >> > - wm->wm4.sampler_count = 0; /* hardware requirement */ >> > - else { >> > - wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4; >> > - } >> > - >> > - if (brw->wm.base.sampler_count) { >> > - /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */ >> > - wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 + >> > - brw->wm.base.sampler_offset) >> 5; >> > - } else { >> > - wm->wm4.sampler_state_pointer = 0; >> > - } >> > - >> > - /* BRW_NEW_FRAGMENT_PROGRAM */ >> > - wm->wm5.program_uses_depth = prog_data->uses_src_depth; >> > - wm->wm5.program_computes_depth = (fp->info.outputs_written & >> > - BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != >> > 0; >> > - /* _NEW_BUFFERS >> > - * Override for NULL depthbuffer case, required by the Pixel Shader >> > Computed >> > - * Depth field. >> > - */ >> > - if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH)) >> > - wm->wm5.program_computes_depth = 0; >> > - >> > - /* _NEW_COLOR */ >> > - wm->wm5.program_uses_killpixel = >> > - prog_data->uses_kill || ctx->Color.AlphaEnabled; >> > - >> > - wm->wm5.max_threads = devinfo->max_wm_threads - 1; >> > - >> > - /* _NEW_BUFFERS | _NEW_COLOR */ >> > - if (brw_color_buffer_write_enabled(brw) || >> > - wm->wm5.program_uses_killpixel || >> > - wm->wm5.program_computes_depth) { >> > - wm->wm5.thread_dispatch_enable = 1; >> > - } >> > - >> > - wm->wm5.legacy_line_rast = 0; >> > - wm->wm5.legacy_global_depth_bias = 0; >> > - wm->wm5.early_depth_test = 1; /* never need to disable */ >> > - wm->wm5.line_aa_region_width = 0; >> > - wm->wm5.line_endcap_aa_region_width = 1; >> > - >> > - /* _NEW_POLYGONSTIPPLE */ >> > - wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag; >> > - >> > - /* _NEW_POLYGON */ >> > - if (ctx->Polygon.OffsetFill) { >> > - wm->wm5.depth_offset = 1; >> > - /* Something weird going on with legacy_global_depth_bias, >> > - * offset_constant, scaling and MRD. This value passes glean >> > - * but gives some odd results elsewere (eg. the >> > - * quad-offset-units test). >> > - */ >> > - wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2; >> > - >> > - /* This is the only value that passes glean: >> > - */ >> > - wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor; >> > - } >> > - >> > - /* _NEW_LINE */ >> > - wm->wm5.line_stipple = ctx->Line.StippleFlag; >> > - >> > - /* BRW_NEW_STATS_WM */ >> > - if (brw->stats_wm) >> > - wm->wm4.stats_enable = 1; >> > - >> > - /* Emit scratch space relocation */ >> > - if (prog_data->base.total_scratch != 0) { >> > - brw_emit_reloc(&brw->batch, >> > - brw->wm.base.state_offset + >> > - offsetof(struct brw_wm_unit_state, thread2), >> > - brw->wm.base.scratch_bo, >> > - wm->thread2.per_thread_scratch_space, >> > - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); >> > - } >> > - >> > - /* Emit sampler state relocation */ >> > - if (brw->wm.base.sampler_count != 0) { >> > - brw_emit_reloc(&brw->batch, >> > - brw->wm.base.state_offset + >> > - offsetof(struct brw_wm_unit_state, wm4), >> > - brw->batch.bo, >> > - brw->wm.base.sampler_offset | wm->wm4.stats_enable | >> > - (wm->wm4.sampler_count << 2), >> > - I915_GEM_DOMAIN_INSTRUCTION, 0); >> > - } >> > - >> > - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; >> > - >> > - /* _NEW_POLGYON */ >> > - if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { >> > - BEGIN_BATCH(2); >> > - OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); >> > - OUT_BATCH_F(ctx->Polygon.OffsetClamp); >> > - ADVANCE_BATCH(); >> > - >> > - brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; >> > - } >> > -} >> > - >> > -const struct brw_tracked_state brw_wm_unit = { >> > - .dirty = { >> > - .mesa = _NEW_BUFFERS | >> > - _NEW_COLOR | >> > - _NEW_LINE | >> > - _NEW_POLYGON | >> > - _NEW_POLYGONSTIPPLE, >> > - .brw = BRW_NEW_BATCH | >> > - BRW_NEW_BLORP | >> > - BRW_NEW_PUSH_CONSTANT_ALLOCATION | >> > - BRW_NEW_FRAGMENT_PROGRAM | >> > - BRW_NEW_FS_PROG_DATA | >> > - BRW_NEW_PROGRAM_CACHE | >> > - BRW_NEW_SAMPLER_STATE_TABLE | >> > - BRW_NEW_STATS_WM, >> > - }, >> > - .emit = brw_upload_wm_unit, >> > -}; >> > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c >> > b/src/mesa/drivers/dri/i965/genX_state_upload.c >> > index 4ff5394..bc64c5d 100644 >> > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c >> > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c >> > @@ -1713,7 +1713,33 @@ static const struct brw_tracked_state >> > genX(sf_state) = { >> > >> > /* ---------------------------------------------------------------------- >> > */ >> > >> > -#if GEN_GEN >= 6 >> > +static bool >> > +brw_color_buffer_write_enabled(struct brw_context *brw) >> > +{ >> > + struct gl_context *ctx = &brw->ctx; >> > + /* BRW_NEW_FRAGMENT_PROGRAM */ >> > + const struct gl_program *fp = brw->fragment_program; >> > + unsigned i; >> > + >> > + /* _NEW_BUFFERS */ >> > + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { >> > + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; >> > + uint64_t outputs_written = fp->info.outputs_written; >> > + >> > + /* _NEW_COLOR */ >> > + if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) || >> > + outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) >> > && >> > + (ctx->Color.ColorMask[i][0] || >> > + ctx->Color.ColorMask[i][1] || >> > + ctx->Color.ColorMask[i][2] || >> > + ctx->Color.ColorMask[i][3])) { >> > + return true; >> > + } >> > + } >> > + >> > + return false; >> > +} >> > + >> > static void >> > genX(upload_wm)(struct brw_context *brw) >> > { >> > @@ -1725,11 +1751,10 @@ genX(upload_wm)(struct brw_context *brw) >> > >> > UNUSED bool writes_depth = >> > wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF; >> > + UNUSED struct brw_stage_state *stage_state = &brw->wm.base; >> > + UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo; >> > >> > -#if GEN_GEN < 7 >> > - const struct brw_stage_state *stage_state = &brw->wm.base; >> > - const struct gen_device_info *devinfo = &brw->screen->devinfo; >> > - >> > +#if GEN_GEN == 6 >> > /* We can't fold this into gen6_upload_wm_push_constants(), because >> > * according to the SNB PRM, vol 2 part 1 section 7.2.2 >> > * (3DSTATE_CONSTANT_PS [DevSNB]): >> > @@ -1748,27 +1773,94 @@ genX(upload_wm)(struct brw_context *brw) >> > } >> > #endif >> > >> > +#if GEN_GEN >= 6 >> > brw_batch_emit(brw, GENX(3DSTATE_WM), wm) { >> > - wm.StatisticsEnable = true; >> > wm.LineAntialiasingRegionWidth = _10pixels; >> > wm.LineEndCapAntialiasingRegionWidth = _05pixels; >> > >> > + wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; >> > + wm.BarycentricInterpolationMode = >> > wm_prog_data->barycentric_interp_modes; >> > +#else >> > + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; >> > + brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, >> > wm) { >> > + if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) { >> > + /* These two fields should be the same pre-gen6, which is why we >> > + * only have one hardware field to program for both dispatch >> > + * widths. >> > + */ >> > + assert(wm_prog_data->base.dispatch_grf_start_reg == >> > + wm_prog_data->dispatch_grf_start_reg_2); >> > + } >> > + >> > + if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) >> > + wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0; >> > + >> > + if (stage_state->sampler_count) >> > + wm.SamplerStatePointer = >> > + instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset); >> > +#if GEN_GEN == 5 >> > + if (wm_prog_data->prog_offset_2) >> > + wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2; >> > +#endif >> > + >> > + wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2; >> > + wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length; >> > + /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ >> > + wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2; >> > + wm.EarlyDepthTestEnable = true; >> > + wm.LineAntialiasingRegionWidth = _05pixels; >> > + wm.LineEndCapAntialiasingRegionWidth = _10pixels; >> > + >> > + /* _NEW_POLYGON */ >> > + if (ctx->Polygon.OffsetFill) { >> > + wm.GlobalDepthOffsetEnable = true; >> > + /* Something weird going on with legacy_global_depth_bias, >> > + * offset_constant, scaling and MRD. This value passes glean >> > + * but gives some odd results elsewere (eg. the >> > + * quad-offset-units test). >> > + */ >> > + wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2; >> > + >> > + /* This is the only value that passes glean: >> > + */ >> > + wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor; >> > + } >> > + >> > + wm.DepthCoefficientURBReadOffset = 1; >> > +#endif >> > + >> > + /* BRW_NEW_STATS_WM */ >> > + wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm; >> > + >> > #if GEN_GEN < 7 >> > if (wm_prog_data->base.use_alt_mode) >> > - wm.FloatingPointMode = Alternate; >> > + wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate; >> > + >> > + wm.SamplerCount = GEN_GEN == 5 ? >> > + 0 : DIV_ROUND_UP(stage_state->sampler_count, 4); >> > >> > - wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4); >> > - wm.BindingTableEntryCount = >> > wm_prog_data->base.binding_table.size_bytes / 4; >> > + wm.BindingTableEntryCount = >> > + wm_prog_data->base.binding_table.size_bytes / 4; >> > wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1; >> > wm._8PixelDispatchEnable = wm_prog_data->dispatch_8; >> > wm._16PixelDispatchEnable = wm_prog_data->dispatch_16; >> > wm.DispatchGRFStartRegisterForConstantSetupData0 = >> > wm_prog_data->base.dispatch_grf_start_reg; >> > - wm.DispatchGRFStartRegisterForConstantSetupData2 = >> > - wm_prog_data->dispatch_grf_start_reg_2; >> > - wm.KernelStartPointer0 = stage_state->prog_offset; >> > - wm.KernelStartPointer2 = stage_state->prog_offset + >> > - wm_prog_data->prog_offset_2; >> > + if (GEN_GEN == 6 || >> > + wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) { >> > + wm.KernelStartPointer0 = KSP_ro(brw, >> > + stage_state->prog_offset); >> > + } >> > + >> > +#if GEN_GEN >= 5 >> > + if (GEN_GEN == 6 || wm_prog_data->prog_offset_2) { >> > + wm.KernelStartPointer2 = >> > + KSP_ro(brw, stage_state->prog_offset + >> > + wm_prog_data->prog_offset_2); >> > + } >> > +#endif >> > + >> > +#if GEN_GEN == 6 >> > wm.DualSourceBlendEnable = >> > wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) && >> > ctx->Color.Blend[0]._UsesDualSrc; >> > @@ -1792,42 +1884,34 @@ genX(upload_wm)(struct brw_context *brw) >> > else >> > wm.PositionXYOffsetSelect = POSOFFSET_NONE; >> > >> > + wm.DispatchGRFStartRegisterForConstantSetupData2 = >> > + wm_prog_data->dispatch_grf_start_reg_2; >> > +#endif >> > + >> > if (wm_prog_data->base.total_scratch) { >> > wm.ScratchSpaceBasePointer = >> > - render_bo(stage_state->scratch_bo, >> > - ffs(stage_state->per_thread_scratch) - 11); >> > + render_bo(stage_state->scratch_bo, 0); >> > + wm.PerThreadScratchSpace = >> > + ffs(stage_state->per_thread_scratch) - 11; >> > } >> > >> > wm.PixelShaderComputedDepth = writes_depth; >> > #endif >> > >> > - wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; >> > - >> > /* _NEW_LINE */ >> > wm.LineStippleEnable = ctx->Line.StippleFlag; >> > >> > /* _NEW_POLYGON */ >> > wm.PolygonStippleEnable = ctx->Polygon.StippleFlag; >> > - wm.BarycentricInterpolationMode = >> > wm_prog_data->barycentric_interp_modes; >> > >> > #if GEN_GEN < 8 >> > - /* _NEW_BUFFERS */ >> > - const bool multisampled_fbo = >> > _mesa_geometric_samples(ctx->DrawBuffer) > 1; >> > >> > - wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; >> > +#if GEN_GEN >= 6 >> > wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; >> > - if (wm_prog_data->uses_kill || >> > - _mesa_is_alpha_test_enabled(ctx) || >> > - _mesa_is_alpha_to_coverage_enabled(ctx) || >> > - wm_prog_data->uses_omask) { >> > - wm.PixelShaderKillsPixel = true; >> > - } >> > >> > - /* _NEW_BUFFERS | _NEW_COLOR */ >> > - if (brw_color_buffer_write_enabled(brw) || writes_depth || >> > - wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) { >> > - wm.ThreadDispatchEnable = true; >> > - } >> > + /* _NEW_BUFFERS */ >> > + const bool multisampled_fbo = >> > _mesa_geometric_samples(ctx->DrawBuffer) > 1; >> > + >> > if (multisampled_fbo) { >> > /* _NEW_MULTISAMPLE */ >> > if (ctx->Multisample.Enabled) >> > @@ -1843,6 +1927,21 @@ genX(upload_wm)(struct brw_context *brw) >> > wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL; >> > wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE; >> > } >> > +#endif >> > + wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; >> > + if (wm_prog_data->uses_kill || >> > + _mesa_is_alpha_test_enabled(ctx) || >> > + _mesa_is_alpha_to_coverage_enabled(ctx) || >> > + (GEN_GEN >= 6 && wm_prog_data->uses_omask)) { >> > + wm.PixelShaderKillsPixel = true; >> > + } >> > + >> > + /* _NEW_BUFFERS | _NEW_COLOR */ >> > + if (brw_color_buffer_write_enabled(brw) || writes_depth || >> > + wm.PixelShaderKillsPixel || >> > + (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) { >> > + wm.ThreadDispatchEnable = true; >> > + } >> > >> > #if GEN_GEN >= 7 >> > wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; >> > @@ -1873,6 +1972,16 @@ genX(upload_wm)(struct brw_context *brw) >> > wm.EarlyDepthStencilControl = EDSC_PSEXEC; >> > #endif >> > } >> > + >> > +#if GEN_GEN <= 5 >> > + if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) { >> > + brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) >> > { >> > + clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp; >> > + } >> > + >> > + brw->wm.offset_clamp = ctx->Polygon.OffsetClamp; >> > + } >> > +#endif >> > } >> > >> > static const struct brw_tracked_state genX(wm_state) = { >> > @@ -1880,17 +1989,23 @@ static const struct brw_tracked_state >> > genX(wm_state) = { >> > .mesa = _NEW_LINE | >> > _NEW_POLYGON | >> > (GEN_GEN < 8 ? _NEW_BUFFERS | >> > - _NEW_COLOR | >> > - _NEW_MULTISAMPLE : >> > + _NEW_COLOR : >> > 0) | >> > - (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0), >> > + (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) | >> > + (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) | >> > + (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0), >> > .brw = BRW_NEW_BLORP | >> > BRW_NEW_FS_PROG_DATA | >> > + (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION | >> > + BRW_NEW_FRAGMENT_PROGRAM | >> > + BRW_NEW_PROGRAM_CACHE | >> > + BRW_NEW_SAMPLER_STATE_TABLE | >> > + BRW_NEW_STATS_WM >> > + : 0) | >> > (GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT), >> > }, >> > .emit = genX(upload_wm), >> > }; >> > -#endif >> > >> > /* ---------------------------------------------------------------------- >> > */ >> > >> > @@ -4475,7 +4590,7 @@ genX(init_atoms)(struct brw_context *brw) >> > &brw_vs_samplers, >> > >> > /* These set up state for brw_psp_urb_cbs */ >> > - &brw_wm_unit, >> > + &genX(wm_state), >> > &genX(sf_clip_viewport), >> > &genX(sf_state), >> > &genX(vs_state), /* always required, enabled or not */ >> > -- >> > 2.9.4 >> > >> > _______________________________________________ >> > mesa-dev mailing list >> > [email protected] >> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
