On Thu, Apr 7, 2016 at 3:27 AM, Grigori Goronzy <g...@chown.ath.cx> wrote: > With the previous changes to handling of viewport clipping, it is > almost trivial to add proper support for guard band clipping. Select a > suitable integer clipping value to keep inside the rasterizer's guard > band range of [-32768, 32767] and program the hardware to use guard > band clipping. > > Guard band clipping speeds up rasterization for primitives that are > partially off-screen. This change in particular results in small > framerate improvements in a wide range of games. > > v2: the rasterizer doesn't clamp coordinates, so use coordinates > before viewport clamping to determine guardband clipping. > --- > > So, how about this? I don't like the introduction of the new struct, > but this seems to be cleaner than v1 anyway. > > src/gallium/drivers/radeonsi/si_state.c | 83 > +++++++++++++++++++++++---------- > src/gallium/drivers/radeonsi/si_state.h | 7 +++ > 2 files changed, 66 insertions(+), 24 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 0bb41e5..a9a58e8 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -838,39 +838,53 @@ static void si_set_scissor_states(struct pipe_context > *ctx, > si_mark_atom_dirty(sctx, &sctx->scissors.atom); > } > > +static void si_mix_scissor(struct si_viewport_scissor *out,
si_max_scissor is a better name. > + struct si_viewport_scissor *mix) > +{ > + out->minx = MIN2(out->minx, mix->minx); > + out->miny = MIN2(out->miny, mix->miny); > + out->maxx = MAX2(out->maxx, mix->maxx); > + out->maxy = MAX2(out->maxy, mix->maxy); > +} > + > static void si_get_scissor_from_viewport(struct pipe_viewport_state *vp, > - struct pipe_scissor_state *scissor) > + struct si_viewport_scissor *scissor) > { > int tmp; > > /* Convert (-1, -1) and (1, 1) from clip space into window space. */ > - int minx = (int)(-vp->scale[0] + vp->translate[0]); > - int miny = (int)(-vp->scale[1] + vp->translate[1]); > - int maxx = (int)(vp->scale[0] + vp->translate[0]); > - int maxy = (int)(vp->scale[1] + vp->translate[1]); > + scissor->minx = (int)(-vp->scale[0] + vp->translate[0]); > + scissor->miny = (int)(-vp->scale[1] + vp->translate[1]); > + scissor->maxx = (int)(vp->scale[0] + vp->translate[0]); > + scissor->maxy = (int)(vp->scale[1] + vp->translate[1]); > > /* r600_draw_rectangle sets this. Disable the scissor. */ > - if (minx == -1 && miny == -1 && maxx == 1 && maxy == 1) { > - minx = miny = 0; > - maxx = maxy = 16384; > + if (scissor->minx == -1 && scissor->miny == -1 && > + scissor->maxx == 1 && scissor->maxy == 1) { > + scissor->minx = scissor->miny = 0; > + scissor->maxx = scissor->maxy = 16384; > } > > /* Handle inverted viewports. */ > - if (minx > maxx) { > - tmp = minx; > - minx = maxx; > - maxx = tmp; > + if (scissor->minx > scissor->maxx) { > + tmp = scissor->minx; > + scissor->minx = scissor->maxx; > + scissor->maxx = tmp; > } > - if (miny > maxy) { > - tmp = miny; > - miny = maxy; > - maxy = tmp; > + if (scissor->miny > scissor->maxy) { > + tmp = scissor->miny; > + scissor->miny = scissor->maxy; > + scissor->maxy = tmp; > } > +} > > - scissor->minx = CLAMP(minx, 0, 16384); > - scissor->miny = CLAMP(miny, 0, 16384); > - scissor->maxx = CLAMP(maxx, 0, 16384); > - scissor->maxy = CLAMP(maxy, 0, 16384); > +static void si_clamp_scissor(struct pipe_scissor_state *out, > + struct si_viewport_scissor *scissor) > +{ > + out->minx = CLAMP(scissor->minx, 0, 16384); > + out->miny = CLAMP(scissor->miny, 0, 16384); > + out->maxx = CLAMP(scissor->maxx, 0, 16384); > + out->maxy = CLAMP(scissor->maxy, 0, 16384); > } > > static void si_clip_scissor(struct pipe_scissor_state *out, > @@ -884,14 +898,18 @@ static void si_clip_scissor(struct pipe_scissor_state > *out, > > static void si_emit_one_scissor(struct radeon_winsys_cs *cs, > struct pipe_viewport_state *vp, > - struct pipe_scissor_state *scissor) > + struct pipe_scissor_state *scissor, > + struct si_viewport_scissor *mix) > { > + struct si_viewport_scissor vp_scissor; > struct pipe_scissor_state final; > > /* Since the guard band disables clipping, we have to clip per-pixel > * using a scissor. > */ > - si_get_scissor_from_viewport(vp, &final); > + si_get_scissor_from_viewport(vp, &vp_scissor); > + si_mix_scissor(mix, &vp_scissor); > + si_clamp_scissor(&final, &vp_scissor); > > if (scissor) > si_clip_scissor(&final, scissor); > @@ -903,19 +921,35 @@ static void si_emit_one_scissor(struct radeon_winsys_cs > *cs, > S_028254_BR_Y(final.maxy)); > } > > +static void si_emit_guardband(struct si_context *sctx, > + struct si_viewport_scissor *scissor) > +{ > + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; > + > + int width = scissor->maxx - scissor->minx; > + int height = scissor->maxy - scissor->miny; > + float guardband_x = width ? (32768 / width) : 1.0; > + float guardband_y = height ? (32768 / height) : 1.0; This doesn't look correct. Example: viewport extents = [-32768, -32767]. The width is 1, but the distance from the limit is 0. Thus, the guard band should be 1.0, not 32768. Please use a #define for all occurences of 32768. > + > + radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, > fui(guardband_x)); > + radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, > fui(guardband_y)); > +} > + > static void si_emit_scissors(struct si_context *sctx, struct r600_atom *atom) > { > struct radeon_winsys_cs *cs = sctx->b.gfx.cs; > struct pipe_scissor_state *states = sctx->scissors.states; > unsigned mask = sctx->scissors.dirty_mask; > bool scissor_enable = sctx->queued.named.rasterizer->scissor_enable; > + struct si_viewport_scissor max_clip = {32768, 32768, -32768, -32768}; > > /* The simple case: Only 1 viewport is active. */ > if (mask & 1 && > !si_get_vs_info(sctx)->writes_viewport_index) { > radeon_set_context_reg_seq(cs, > R_028250_PA_SC_VPORT_SCISSOR_0_TL, 2); > si_emit_one_scissor(cs, &sctx->viewports.states[0], > - scissor_enable ? &states[0] : NULL); > + scissor_enable ? &states[0] : NULL, > &max_clip); > + si_emit_guardband(sctx, &max_clip); > sctx->scissors.dirty_mask &= ~1; /* clear one bit */ > return; > } > @@ -929,9 +963,10 @@ static void si_emit_scissors(struct si_context *sctx, > struct r600_atom *atom) > start * 4 * 2, count * 2); > for (i = start; i < start+count; i++) { > si_emit_one_scissor(cs, &sctx->viewports.states[i], > - scissor_enable ? &states[i] : > NULL); > + scissor_enable ? &states[i] : > NULL, &max_clip); This needs to look at all viewports, not just the dirty ones. > } > } > + si_emit_guardband(sctx, &max_clip); > sctx->scissors.dirty_mask = 0; > } > > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 2a63279..d1b4c52 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -219,6 +219,13 @@ struct si_buffer_resources { > struct pipe_resource **buffers; /* this has num_buffers > elements */ > }; > > +struct si_viewport_scissor { > + int minx; > + int miny; > + int maxx; > + int maxy; > +}; Please rename the structure to si_signed_scissor to make clear what it really is. Marek _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev