On Friday, July 14, 2017 11:02:57 AM PDT you wrote: > Add the code into its own function and atom, since almost nothing is > shared with GEN >= 6. > > v2: Split GEN <=5 and GEN >= 6 into separate functions (Ken). > > Signed-off-by: Rafael Antognolli <[email protected]> > Cc: Kenneth Graunke <[email protected]> > --- > src/mesa/drivers/dri/i965/Makefile.sources | 1 - > src/mesa/drivers/dri/i965/brw_clip_state.c | 147 > -------------------------- > src/mesa/drivers/dri/i965/brw_structs.h | 65 ------------ > src/mesa/drivers/dri/i965/genX_state_upload.c | 103 +++++++++++++++++- > 4 files changed, 101 insertions(+), 215 deletions(-) > delete mode 100644 src/mesa/drivers/dri/i965/brw_clip_state.c > > diff --git a/src/mesa/drivers/dri/i965/Makefile.sources > b/src/mesa/drivers/dri/i965/Makefile.sources > index fc46645..60a41f8 100644 > --- a/src/mesa/drivers/dri/i965/Makefile.sources > +++ b/src/mesa/drivers/dri/i965/Makefile.sources > @@ -6,7 +6,6 @@ i965_FILES = \ > brw_bufmgr.h \ > brw_clear.c \ > brw_clip.c \ > - brw_clip_state.c \ > brw_compute.c \ > brw_conditional_render.c \ > brw_context.c \ > diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c > b/src/mesa/drivers/dri/i965/brw_clip_state.c > deleted file mode 100644 > index 8f22c0f..0000000 > --- a/src/mesa/drivers/dri/i965/brw_clip_state.c > +++ /dev/null > @@ -1,147 +0,0 @@ > -/* > - Copyright (C) Intel Corp. 2006. All Rights Reserved. > - Intel funded Tungsten Graphics to > - develop this 3D driver. > - > - Permission is hereby granted, free of charge, to any person obtaining > - a copy of this software and associated documentation files (the > - "Software"), to deal in the Software without restriction, including > - without limitation the rights to use, copy, modify, merge, publish, > - distribute, sublicense, and/or sell copies of the Software, and to > - permit persons to whom the Software is furnished to do so, subject to > - the following conditions: > - > - The above copyright notice and this permission notice (including the > - next paragraph) shall be included in all copies or substantial > - portions of the Software. > - > - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, > - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. > - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE > - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION > - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION > - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > - > - **********************************************************************/ > - /* > - * Authors: > - * Keith Whitwell <[email protected]> > - */ > - > -#include "intel_batchbuffer.h" > -#include "brw_context.h" > -#include "brw_state.h" > -#include "brw_defines.h" > -#include "main/framebuffer.h" > - > -static void > -brw_upload_clip_unit(struct brw_context *brw) > -{ > - struct gl_context *ctx = &brw->ctx; > - struct brw_clip_unit_state *clip; > - > - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); > - memset(clip, 0, sizeof(*clip)); > - > - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_CLIP_PROG_DATA */ > - clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / > - 16 - 1); > - clip->thread0.kernel_start_pointer = > - brw_program_reloc(brw, > - brw->clip.state_offset + > - offsetof(struct brw_clip_unit_state, thread0), > - brw->clip.prog_offset + > - (clip->thread0.grf_reg_count << 1)) >> 6; > - > - clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; > - clip->thread1.single_program_flow = 1; > - > - clip->thread3.urb_entry_read_length = > brw->clip.prog_data->urb_read_length; > - clip->thread3.const_urb_entry_read_length = > - brw->clip.prog_data->curb_read_length; > - > - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ > - clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; > - clip->thread3.dispatch_grf_start_reg = 1; > - clip->thread3.urb_entry_read_offset = 0; > - > - /* BRW_NEW_URB_FENCE */ > - clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries; > - clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; > - /* If we have enough clip URB entries to run two threads, do so. > - */ > - if (brw->urb.nr_clip_entries >= 10) { > - /* Half of the URB entries go to each thread, and it has to be an > - * even number. > - */ > - assert(brw->urb.nr_clip_entries % 2 == 0); > - > - /* Although up to 16 concurrent Clip threads are allowed on Ironlake, > - * only 2 threads can output VUEs at a time. > - */ > - if (brw->gen == 5) > - clip->thread4.max_threads = 16 - 1; > - else > - clip->thread4.max_threads = 2 - 1; > - } else { > - assert(brw->urb.nr_clip_entries >= 5); > - clip->thread4.max_threads = 1 - 1; > - } > - > - /* _NEW_TRANSFORM */ > - if (brw->gen == 5 || brw->is_g4x) > - clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled; > - else > - /* Up to 6 actual clip flags, plus the 7th for negative RHW > workaround. */ > - clip->clip5.userclip_enable_flags = (ctx->Transform.ClipPlanesEnabled > & 0x3f) | 0x40; > - > - clip->clip5.userclip_must_clip = 1; > - > - /* enable guardband clipping if we can */ > - clip->clip5.guard_band_enable = 1; > - clip->clip6.clipper_viewport_state_ptr = > - (brw->batch.bo->offset64 + brw->clip.vp_offset) >> 5; > - > - /* emit clip viewport relocation */ > - brw_emit_reloc(&brw->batch, > - (brw->clip.state_offset + > - offsetof(struct brw_clip_unit_state, clip6)), > - brw->batch.bo, brw->clip.vp_offset, > - I915_GEM_DOMAIN_INSTRUCTION, 0); > - > - /* _NEW_TRANSFORM */ > - if (!ctx->Transform.DepthClamp) > - clip->clip5.viewport_z_clip_enable = 1; > - clip->clip5.viewport_xy_clip_enable = 1; > - clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE; > - if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) > - clip->clip5.api_mode = BRW_CLIP_API_DX; > - else > - clip->clip5.api_mode = BRW_CLIP_API_OGL; > - clip->clip5.clip_mode = brw->clip.prog_data->clip_mode; > - > - if (brw->is_g4x) > - clip->clip5.negative_w_clip_test = 1; > - > - clip->viewport_xmin = -1; > - clip->viewport_xmax = 1; > - clip->viewport_ymin = -1; > - clip->viewport_ymax = 1; > - > - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; > -} > - > -const struct brw_tracked_state brw_clip_unit = { > - .dirty = { > - .mesa = _NEW_TRANSFORM | > - _NEW_VIEWPORT, > - .brw = BRW_NEW_BATCH | > - BRW_NEW_BLORP | > - BRW_NEW_CLIP_PROG_DATA | > - BRW_NEW_PUSH_CONSTANT_ALLOCATION | > - BRW_NEW_PROGRAM_CACHE | > - BRW_NEW_URB_FENCE, > - }, > - .emit = brw_upload_clip_unit, > -}; > diff --git a/src/mesa/drivers/dri/i965/brw_structs.h > b/src/mesa/drivers/dri/i965/brw_structs.h > index 6feab0d..5a0d91d 100644 > --- a/src/mesa/drivers/dri/i965/brw_structs.h > +++ b/src/mesa/drivers/dri/i965/brw_structs.h > @@ -115,71 +115,6 @@ struct thread3 > unsigned pad3:1; > }; > > - > - > -struct brw_clip_unit_state > -{ > - struct thread0 thread0; > - struct > - { > - unsigned pad0:7; > - unsigned sw_exception_enable:1; > - unsigned pad1:3; > - unsigned mask_stack_exception_enable:1; > - unsigned pad2:1; > - unsigned illegal_op_exception_enable:1; > - unsigned pad3:2; > - unsigned floating_point_mode:1; > - unsigned thread_priority:1; > - unsigned binding_table_entry_count:8; > - unsigned pad4:5; > - unsigned single_program_flow:1; > - } thread1; > - > - struct thread2 thread2; > - struct thread3 thread3; > - > - struct > - { > - unsigned pad0:9; > - unsigned gs_output_stats:1; /* not always */ > - unsigned stats_enable:1; > - unsigned nr_urb_entries:7; > - unsigned pad1:1; > - unsigned urb_entry_allocation_size:5; > - unsigned pad2:1; > - unsigned max_threads:5; /* may be less */ > - unsigned pad3:2; > - } thread4; > - > - struct > - { > - unsigned pad0:13; > - unsigned clip_mode:3; > - unsigned userclip_enable_flags:8; > - unsigned userclip_must_clip:1; > - unsigned negative_w_clip_test:1; > - unsigned guard_band_enable:1; > - unsigned viewport_z_clip_enable:1; > - unsigned viewport_xy_clip_enable:1; > - unsigned vertex_position_space:1; > - unsigned api_mode:1; > - unsigned pad2:1; > - } clip5; > - > - struct > - { > - unsigned pad0:5; > - unsigned clipper_viewport_state_ptr:27; > - } clip6; > - > - > - float viewport_xmin; > - float viewport_xmax; > - float viewport_ymin; > - float viewport_ymax; > -}; > - > struct brw_wm_unit_state > { > struct thread0 thread0; > diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c > b/src/mesa/drivers/dri/i965/genX_state_upload.c > index 54a547c..b71ca46 100644 > --- a/src/mesa/drivers/dri/i965/genX_state_upload.c > +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c > @@ -1268,7 +1268,106 @@ static const struct brw_tracked_state > genX(depth_stencil_state) = { > > /* ---------------------------------------------------------------------- */ > > -#if GEN_GEN >= 6 > +#if GEN_GEN <= 5 > + > +static void > +genX(upload_clip_state)(struct brw_context *brw) > +{ > + struct gl_context *ctx = &brw->ctx; > + > + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; > + brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) { > + clip.KernelStartPointer = KSP_ro(brw, brw->clip.prog_offset); > + clip.GRFRegisterCount = > + DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1; > + clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate; > + clip.SingleProgramFlow = true; > + clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length; > + clip.ConstantURBEntryReadLength = > brw->clip.prog_data->curb_read_length; > + > + /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ > + clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2; > + clip.DispatchGRFStartRegisterForURBData = 1; > + clip.VertexURBEntryReadOffset = 0; > + > + /* BRW_NEW_URB_FENCE */ > + clip.NumberofURBEntries = brw->urb.nr_clip_entries; > + clip.URBEntryAllocationSize = brw->urb.vsize - 1; > + > + if (brw->urb.nr_clip_entries >= 10) { > + /* Half of the URB entries go to each thread, and it has to be an > + * even number. > + */ > + assert(brw->urb.nr_clip_entries % 2 == 0); > + > + /* Although up to 16 concurrent Clip threads are allowed on > Ironlake, > + * only 2 threads can output VUEs at a time. > + */ > + clip.MaximumNumberofThreads = GEN_GEN == 5 ? 16 - 1 : 2 - 1;
Could we write this as:
clip.MaximumNumberofThreads = (GEN_GEN == 5 ? 16 : 2) - 1;
I think it might be a little easier to read - 16 threads on Gen5,
2 threads on Gen4/G45, and minus 1 because of the encoding.
> + } else {
> + assert(brw->urb.nr_clip_entries >= 5);
> + clip.MaximumNumberofThreads = 1 - 1;
> + }
> +
> + clip.VertexPositionSpace = VPOS_NDCSPACE;
> + clip.UserClipFlagsMustClipEnable = true;
> + clip.GuardbandClipTestEnable = true;
> +
> + clip.ClipperViewportStatePointer =
> + instruction_ro_bo(brw->batch.bo, brw->clip.vp_offset);
> +
> + clip.ScreenSpaceViewportXMin = -1;
> + clip.ScreenSpaceViewportXMax = 1;
> + clip.ScreenSpaceViewportYMin = -1;
> + clip.ScreenSpaceViewportYMax = 1;
> +
> + clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
> +
> + /* _NEW_TRANSFORM */
> + if (GEN_GEN == 5 || GEN_IS_G4X) {
> + clip.UserClipDistanceClipTestEnableBitmask =
> + ctx->Transform.ClipPlanesEnabled;
> + } else {
Let's keep the comment - mentioning negative RHW is useful so people know
why we're doing this crazy thing.
/* Up to 6 actual clip flags, plus the 7th for the negative RHW
* workaround.
*/
> + clip.UserClipDistanceClipTestEnableBitmask =
> + (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
> + }
> +
> + if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
> + clip.APIMode = APIMODE_D3D;
> + else
> + clip.APIMode = APIMODE_OGL;
> +
> + clip.GuardbandClipTestEnable = true;
> +
> + clip.ClipMode = brw->clip.prog_data->clip_mode;
> +
> +#if GEN_IS_G4X
> + clip.NegativeWClipTestEnable = true;
> +#endif
> +
> + /* _NEW_POLYGON,
> + * BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TES_PROG_DATA | BRW_NEW_PRIMITIVE
> + */
Looks like this comment got imported from future gens...setting something
to "true" definitely doesn't have any state dependencies...and we don't
have tessellation shaders on Gen4-5...nor even real GS enabled. Let's
just drop the comment.
Thanks, this looks great!
Reviewed-by: Kenneth Graunke <[email protected]>
> + clip.ViewportXYClipTestEnable = true;
> + }
> +}
> +
> +const struct brw_tracked_state genX(clip_state) = {
> + .dirty = {
> + .mesa = _NEW_TRANSFORM |
> + _NEW_VIEWPORT,
> + .brw = BRW_NEW_BATCH |
> + BRW_NEW_BLORP |
> + BRW_NEW_CLIP_PROG_DATA |
> + BRW_NEW_PUSH_CONSTANT_ALLOCATION |
> + BRW_NEW_PROGRAM_CACHE |
> + BRW_NEW_URB_FENCE,
> + },
> + .emit = genX(upload_clip_state),
> +};
> +
> +#else
> +
> static void
> genX(upload_clip_state)(struct brw_context *brw)
> {
> @@ -5123,7 +5222,7 @@ genX(init_atoms)(struct brw_context *brw)
> &genX(sf_clip_viewport),
> &genX(sf_state),
> &genX(vs_state), /* always required, enabled or not */
> - &brw_clip_unit,
> + &genX(clip_state),
> &genX(gs_state),
>
> /* Command packets:
>
signature.asc
Description: This is a digitally signed message part.
_______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
