On Sun, Sep 9, 2012 at 1:03 AM, Marek Olšák <[email protected]> wrote: > Based on the patch called "simplify and fix flushing and synchronization" > by Jerome Glisse. > > Rebased, removed unneded code, simplified more and cleaned up. > > Also, SH_ACTION_ENA is not set when changing shaders (hw doesn't seem > to need it). It's only used to flush constant buffers.
Looks good, still would like to do some stress testing will try to do that today. Reviewed-by: Jerome Glisse <[email protected]> > --- > src/gallium/drivers/r600/evergreen_compute.c | 20 +- > .../drivers/r600/evergreen_compute_internal.c | 4 +- > src/gallium/drivers/r600/evergreen_state.c | 7 +- > src/gallium/drivers/r600/evergreend.h | 7 +- > src/gallium/drivers/r600/r600.h | 18 +- > src/gallium/drivers/r600/r600_hw_context.c | 218 > +++++++++++++------- > src/gallium/drivers/r600/r600_hw_context_priv.h | 3 +- > src/gallium/drivers/r600/r600_pipe.c | 2 - > src/gallium/drivers/r600/r600_pipe.h | 4 - > src/gallium/drivers/r600/r600_state.c | 21 +- > src/gallium/drivers/r600/r600_state_common.c | 76 ++----- > src/gallium/drivers/r600/r600d.h | 12 ++ > 12 files changed, 210 insertions(+), 182 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c > b/src/gallium/drivers/r600/evergreen_compute.c > index 3533312..1fb63d6 100644 > --- a/src/gallium/drivers/r600/evergreen_compute.c > +++ b/src/gallium/drivers/r600/evergreen_compute.c > @@ -96,7 +96,7 @@ static void evergreen_cs_set_vertex_buffer( > vb->buffer = buffer; > vb->user_buffer = NULL; > > - r600_inval_vertex_cache(rctx); > + rctx->flags |= rctx->has_vertex_cache ? R600_CONTEXT_VTX_FLUSH : > R600_CONTEXT_TEX_FLUSH; > state->enabled_mask |= 1 << vb_index; > state->dirty_mask |= 1 << vb_index; > r600_atom_dirty(rctx, &state->atom); > @@ -332,8 +332,11 @@ static void compute_emit_cs(struct r600_context *ctx, > const uint *block_layout, > */ > r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom); > > + ctx->flags |= R600_CONTEXT_CB_FLUSH; > + r600_flush_emit(ctx); > + > /* Emit cb_state */ > - cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; > + cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; > r600_context_pipe_state_emit(ctx, cb_state, > RADEON_CP_PACKET3_COMPUTE_MODE); > > /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ > @@ -384,15 +387,10 @@ static void compute_emit_cs(struct r600_context *ctx, > const uint *block_layout, > /* Emit dispatch state and dispatch packet */ > evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); > > - /* r600_flush_framebuffer() updates the cb_flush_flags and then > - * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which > emits > - * a SURFACE_SYNC packet via r600_emit_surface_sync(). > - * > - * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to > - * 0xffffffff, so we will need to add a field to struct > - * r600_surface_sync_cmd if we want to manually set this value. > + /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to > 0xffffffff > */ > - r600_flush_framebuffer(ctx, true /* Flush now */); > + ctx->flags |= R600_CONTEXT_CB_FLUSH; > + r600_flush_emit(ctx); > > #if 0 > COMPUTE_DBG("cdw: %i\n", cs->cdw); > @@ -444,7 +442,7 @@ void evergreen_emit_cs_shader( > r600_write_value(cs, r600_context_bo_reloc(rctx, > shader->shader_code_bo, > RADEON_USAGE_READ)); > > - r600_inval_shader_cache(rctx); > + rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; > } > > static void evergreen_launch_grid( > diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c > b/src/gallium/drivers/r600/evergreen_compute_internal.c > index 50a60d3..dc95732 100644 > --- a/src/gallium/drivers/r600/evergreen_compute_internal.c > +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c > @@ -562,7 +562,7 @@ void evergreen_set_tex_resource( > > util_format_get_blockwidth(tmp->resource.b.b.format) * > view->base.texture->width0*height*depth; > > - r600_inval_texture_cache(pipe->ctx); > + pipe->ctx->flags |= R600_CONTEXT_TEX_FLUSH; > > evergreen_emit_force_reloc(res); > evergreen_emit_force_reloc(res); > @@ -621,7 +621,7 @@ void evergreen_set_const_cache( > res->usage = RADEON_USAGE_READ; > res->coher_bo_size = size; > > - r600_inval_shader_cache(pipe->ctx); > + pipe->ctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; > } > > struct r600_resource* r600_compute_buffer_alloc_vram( > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 9a5183e..2a7a35f 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -1697,7 +1697,12 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > if (rstate == NULL) > return; > > - r600_flush_framebuffer(rctx, false); > + if (rctx->framebuffer.nr_cbufs) { > + rctx->flags |= R600_CONTEXT_CB_FLUSH; > + } > + if (rctx->framebuffer.zsbuf) { > + rctx->flags |= R600_CONTEXT_DB_FLUSH; > + } > > /* unreference old buffer and reference new one */ > rstate->id = R600_PIPE_STATE_FRAMEBUFFER; > diff --git a/src/gallium/drivers/r600/evergreend.h > b/src/gallium/drivers/r600/evergreend.h > index e4d72f5..18e1eb7 100644 > --- a/src/gallium/drivers/r600/evergreend.h > +++ b/src/gallium/drivers/r600/evergreend.h > @@ -46,7 +46,8 @@ > #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 > #define EVENT_TYPE_ZPASS_DONE 0x15 > #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT 0x16 > -#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f > +#define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH 0x1f > +#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c > > #define EVENT_TYPE(x) ((x) << 0) > #define EVENT_INDEX(x) ((x) << 8) > @@ -2186,16 +2187,12 @@ > #define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF > #define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) > #define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) > - > #define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) > #define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) > - > #define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) > #define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) > - > #define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) > #define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) > - > #define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) > #define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) > #define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF > diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h > index 1c8bd24..b6425cd 100644 > --- a/src/gallium/drivers/r600/r600.h > +++ b/src/gallium/drivers/r600/r600.h > @@ -180,9 +180,16 @@ struct r600_so_target { > unsigned so_index; > }; > > -#define R600_CONTEXT_DRAW_PENDING (1 << 0) > -#define R600_CONTEXT_DST_CACHES_DIRTY (1 << 1) > -#define R600_PARTIAL_FLUSH (1 << 2) > +#define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 0) > +#define R600_CONTEXT_CB_FLUSH (1 << 1) > +#define R600_CONTEXT_DB_FLUSH (1 << 2) > +#define R600_CONTEXT_SHADERCONST_FLUSH (1 << 3) > +#define R600_CONTEXT_TEX_FLUSH (1 << 4) > +#define R600_CONTEXT_VTX_FLUSH (1 << 5) > +#define R600_CONTEXT_STREAMOUT_FLUSH (1 << 6) > +#define R600_CONTEXT_WAIT_IDLE (1 << 7) > +#define R600_CONTEXT_FLUSH_AND_INV (1 << 8) > +#define R600_CONTEXT_HTILE_ERRATA (1 << 9) > > struct r600_context; > struct r600_screen; > @@ -196,10 +203,7 @@ void r600_context_flush(struct r600_context *ctx, > unsigned flags); > > void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource > *fence, > unsigned offset, unsigned value); > -void r600_inval_shader_cache(struct r600_context *ctx); > -void r600_inval_texture_cache(struct r600_context *ctx); > -void r600_inval_vertex_cache(struct r600_context *ctx); > -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now); > +void r600_flush_emit(struct r600_context *ctx); > > void r600_context_streamout_begin(struct r600_context *ctx); > void r600_context_streamout_end(struct r600_context *ctx); > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index 0ec13e5..d40f6b6 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -114,19 +114,6 @@ err: > return; > } > > -void r600_context_ps_partial_flush(struct r600_context *ctx) > -{ > - struct radeon_winsys_cs *cs = ctx->cs; > - > - if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING)) > - return; > - > - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | > EVENT_INDEX(4); > - > - ctx->flags &= ~R600_CONTEXT_DRAW_PENDING; > -} > - > static void r600_init_block(struct r600_context *ctx, > struct r600_block *block, > const struct r600_reg *reg, int index, int nreg, > @@ -665,7 +652,7 @@ void r600_need_cs_space(struct r600_context *ctx, > unsigned num_dw, > } > > /* Count in framebuffer cache flushes at the end of CS. */ > - num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) > */ > + num_dw += 44; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) > */ > > /* Save 16 dwords for the fence mechanism. */ > num_dw += 16; > @@ -693,7 +680,7 @@ void r600_context_dirty_block(struct r600_context *ctx, > LIST_ADDTAIL(&block->list,&ctx->dirty); > > if (block->flags & REG_FLAG_FLUSH_CHANGE) { > - r600_context_ps_partial_flush(ctx); > + ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > } > } > } > @@ -861,54 +848,138 @@ out: > LIST_DELINIT(&block->list); > } > > -void r600_inval_shader_cache(struct r600_context *ctx) > +void r600_flush_emit(struct r600_context *rctx) > { > - ctx->surface_sync_cmd.flush_flags |= S_0085F0_SH_ACTION_ENA(1); > - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); > -} > + struct radeon_winsys_cs *cs = rctx->cs; > > -void r600_inval_texture_cache(struct r600_context *ctx) > -{ > - ctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1); > - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); > -} > + if (!rctx->flags) { > + return; > + } > > -void r600_inval_vertex_cache(struct r600_context *ctx) > -{ > - if (ctx->has_vertex_cache) { > - ctx->surface_sync_cmd.flush_flags |= > S_0085F0_VC_ACTION_ENA(1); > - } else { > - /* Some GPUs don't have the vertex cache and must use the > texture cache instead. */ > - ctx->surface_sync_cmd.flush_flags |= > S_0085F0_TC_ACTION_ENA(1); > + if (rctx->flags & R600_CONTEXT_PS_PARTIAL_FLUSH) { > + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) > | EVENT_INDEX(4); > } > - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); > -} > > -void r600_flush_framebuffer(struct r600_context *ctx, bool flush_now) > -{ > - if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) > - return; > + if (rctx->flags & R600_CONTEXT_FLUSH_AND_INV) { > + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > + cs->buf[cs->cdw++] = > EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) | EVENT_INDEX(0); > + > + /* DB flushes are special due to errata with hyperz, we need > to > + * insert a no-op, so that the cache has time to really flush. > + */ > + if (rctx->chip_class <= R700 && > + rctx->flags & R600_CONTEXT_HTILE_ERRATA) { > + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 31, 0); > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + cs->buf[cs->cdw++] = 0xdeadcafe; > + } > + } > > - ctx->surface_sync_cmd.flush_flags |= > - r600_get_cb_flush_flags(ctx) | > - (ctx->framebuffer.zsbuf ? S_0085F0_DB_ACTION_ENA(1) | > S_0085F0_DB_DEST_BASE_ENA(1) : 0); > + if (rctx->flags & (R600_CONTEXT_CB_FLUSH | > + R600_CONTEXT_DB_FLUSH | > + R600_CONTEXT_SHADERCONST_FLUSH | > + R600_CONTEXT_TEX_FLUSH | > + R600_CONTEXT_VTX_FLUSH | > + R600_CONTEXT_STREAMOUT_FLUSH)) { > + /* anything left (cb, vtx, shader, streamout) can be flushed > + * using the surface sync packet > + */ > + unsigned flags = 0; > + > + if (rctx->flags & R600_CONTEXT_CB_FLUSH) { > + flags |= S_0085F0_CB_ACTION_ENA(1) | > + S_0085F0_CB0_DEST_BASE_ENA(1) | > + S_0085F0_CB1_DEST_BASE_ENA(1) | > + S_0085F0_CB2_DEST_BASE_ENA(1) | > + S_0085F0_CB3_DEST_BASE_ENA(1) | > + S_0085F0_CB4_DEST_BASE_ENA(1) | > + S_0085F0_CB5_DEST_BASE_ENA(1) | > + S_0085F0_CB6_DEST_BASE_ENA(1) | > + S_0085F0_CB7_DEST_BASE_ENA(1); > + > + if (rctx->chip_class >= EVERGREEN) { > + flags |= S_0085F0_CB8_DEST_BASE_ENA(1) | > + S_0085F0_CB9_DEST_BASE_ENA(1) | > + S_0085F0_CB10_DEST_BASE_ENA(1) | > + S_0085F0_CB11_DEST_BASE_ENA(1); > + } > > - if (flush_now) { > - r600_emit_atom(ctx, &ctx->surface_sync_cmd.atom); > - } else { > - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); > - } > + /* RV670 errata > + * (CB1_DEST_BASE_ENA is also required, which is > + * included unconditionally above). */ > + if (rctx->family == CHIP_RV670 || > + rctx->family == CHIP_RS780 || > + rctx->family == CHIP_RS880) { > + flags |= S_0085F0_DEST_BASE_0_ENA(1); > + } > + } > > - /* Also add a complete cache flush to work around broken flushing on > R6xx. */ > - if (ctx->chip_class == R600) { > - if (flush_now) { > - r600_emit_atom(ctx, &ctx->r6xx_flush_and_inv_cmd); > - } else { > - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); > + if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { > + flags |= S_0085F0_SO0_DEST_BASE_ENA(1) | > + S_0085F0_SO1_DEST_BASE_ENA(1) | > + S_0085F0_SO2_DEST_BASE_ENA(1) | > + S_0085F0_SO3_DEST_BASE_ENA(1) | > + S_0085F0_SMX_ACTION_ENA(1); > + > + /* RV670 errata */ > + if (rctx->family == CHIP_RV670 || > + rctx->family == CHIP_RS780 || > + rctx->family == CHIP_RS880) { > + flags |= S_0085F0_DEST_BASE_0_ENA(1); > + } > } > + > + flags |= (rctx->flags & R600_CONTEXT_DB_FLUSH) ? > S_0085F0_DB_ACTION_ENA(1) | > + > S_0085F0_DB_DEST_BASE_ENA(1): 0; > + flags |= (rctx->flags & R600_CONTEXT_SHADERCONST_FLUSH) ? > S_0085F0_SH_ACTION_ENA(1) : 0; > + flags |= (rctx->flags & R600_CONTEXT_TEX_FLUSH) ? > S_0085F0_TC_ACTION_ENA(1) : 0; > + flags |= (rctx->flags & R600_CONTEXT_VTX_FLUSH) ? > S_0085F0_VC_ACTION_ENA(1) : 0; > + > + cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); > + cs->buf[cs->cdw++] = flags; /* CP_COHER_CNTL */ > + cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ > + cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ > + cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ > + } > + > + if (rctx->flags & R600_CONTEXT_WAIT_IDLE) { > + /* wait for things to settle */ > + r600_write_config_reg(cs, R_008040_WAIT_UNTIL, > S_008040_WAIT_3D_IDLE(1)); > } > > - ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; > + /* everything is properly flushed */ > + rctx->flags = 0; > } > > void r600_context_flush(struct r600_context *ctx, unsigned flags) > @@ -937,10 +1008,18 @@ void r600_context_flush(struct r600_context *ctx, > unsigned flags) > streamout_suspended = true; > } > > - r600_flush_framebuffer(ctx, true); > - > /* partial flush is needed to avoid lockups on some chips with user > fences */ > - r600_context_ps_partial_flush(ctx); > + ctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > + > + /* flush the framebuffer */ > + ctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_DB_FLUSH; > + > + /* R6xx errata */ > + if (ctx->chip_class == R600) { > + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; > + } > + > + r600_flush_emit(ctx); > > /* old kernels and userspace don't set SX_MISC, so we must reset it > to 0 here */ > if (ctx->chip_class <= R700) { > @@ -959,10 +1038,6 @@ void r600_context_flush(struct r600_context *ctx, > unsigned flags) > /* Begin a new CS. */ > r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); > > - /* Invalidate caches. */ > - r600_inval_texture_cache(ctx); > - r600_flush_framebuffer(ctx, false); > - > /* Re-emit states. */ > r600_atom_dirty(ctx, &ctx->alphatest_state.atom); > r600_atom_dirty(ctx, &ctx->cb_misc_state.atom); > @@ -1024,7 +1099,10 @@ void r600_context_emit_fence(struct r600_context *ctx, > struct r600_resource *fen > va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); > va = va + (offset << 2); > > - r600_context_ps_partial_flush(ctx); > + ctx->flags &= ~R600_CONTEXT_PS_PARTIAL_FLUSH; > + cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > + cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | > EVENT_INDEX(4); > + > cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); > cs->buf[cs->cdw++] = > EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); > cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ > @@ -1185,7 +1263,7 @@ void r600_context_streamout_end(struct r600_context > *ctx) > { > struct radeon_winsys_cs *cs = ctx->cs; > struct r600_so_target **t = ctx->so_targets; > - unsigned i, flush_flags = 0; > + unsigned i; > uint64_t va; > > if (ctx->chip_class >= EVERGREEN) { > @@ -1212,7 +1290,6 @@ void r600_context_streamout_end(struct r600_context > *ctx) > r600_context_bo_reloc(ctx, t[i]->filled_size, > RADEON_USAGE_WRITE); > > - flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i; > } > } > > @@ -1221,22 +1298,11 @@ void r600_context_streamout_end(struct r600_context > *ctx) > } else { > r600_set_streamout_enable(ctx, 0); > } > + ctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH; > > - /* This is needed to fix cache flushes on r600. */ > + /* R6xx errata */ > if (ctx->chip_class == R600) { > - if (ctx->family == CHIP_RV670 || > - ctx->family == CHIP_RS780 || > - ctx->family == CHIP_RS880) { > - flush_flags |= S_0085F0_DEST_BASE_0_ENA(1); > - } > - > - r600_atom_dirty(ctx, &ctx->r6xx_flush_and_inv_cmd); > + ctx->flags |= R600_CONTEXT_FLUSH_AND_INV; > } > - > - /* Flush streamout caches. */ > - ctx->surface_sync_cmd.flush_flags |= > - S_0085F0_SMX_ACTION_ENA(1) | flush_flags; > - r600_atom_dirty(ctx, &ctx->surface_sync_cmd.atom); > - > ctx->num_cs_dw_streamout_end = 0; > } > diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h > b/src/gallium/drivers/r600/r600_hw_context_priv.h > index 8d0ebc9..73500c7 100644 > --- a/src/gallium/drivers/r600/r600_hw_context_priv.h > +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h > @@ -28,7 +28,8 @@ > > #include "r600_pipe.h" > > -#define R600_MAX_DRAW_CS_DWORDS 16 > +/* the number of CS dwords for flushing and drawing */ > +#define R600_MAX_DRAW_CS_DWORDS 64 > > /* these flags are used in register flags and added into block flags */ > #define REG_FLAG_NEED_BO 1 > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index 9e6c28d..d0dd4d5 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -242,8 +242,6 @@ static struct pipe_context *r600_create_context(struct > pipe_screen *screen, void > rctx->context.create_video_decoder = vl_create_decoder; > rctx->context.create_video_buffer = vl_video_buffer_create; > > - r600_init_common_atoms(rctx); > - > switch (rctx->chip_class) { > case R600: > case R700: > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index ff720e9..8ce8532 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -370,8 +370,6 @@ struct r600_context { > /** Compute specific registers initializations. The start_cs_cmd atom > * must be emitted before start_compute_cs_cmd. */ > struct r600_command_buffer start_compute_cs_cmd; > - struct r600_surface_sync_cmd surface_sync_cmd; > - struct r600_atom r6xx_flush_and_inv_cmd; > struct r600_alphatest_state alphatest_state; > struct r600_cb_misc_state cb_misc_state; > struct r600_db_misc_state db_misc_state; > @@ -575,8 +573,6 @@ void r600_emit_alphatest_state(struct r600_context *rctx, > struct r600_atom *atom > void r600_init_atom(struct r600_context *rctx, struct r600_atom *atom, > unsigned id, > void (*emit)(struct r600_context *ctx, struct r600_atom > *state), > unsigned num_dw); > -void r600_init_common_atoms(struct r600_context *rctx); > -unsigned r600_get_cb_flush_flags(struct r600_context *rctx); > void r600_texture_barrier(struct pipe_context *ctx); > void r600_set_index_buffer(struct pipe_context *ctx, > const struct pipe_index_buffer *ib); > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index ccafdc6..7dd4148 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -1600,13 +1600,23 @@ static void r600_set_framebuffer_state(struct > pipe_context *ctx, > if (rstate == NULL) > return; > > - r600_flush_framebuffer(rctx, false); > + if (rctx->framebuffer.nr_cbufs) { > + rctx->flags |= R600_CONTEXT_CB_FLUSH; > + } > + if (rctx->framebuffer.zsbuf) { > + rctx->flags |= R600_CONTEXT_DB_FLUSH; > + } > + /* R6xx errata */ > + if (rctx->chip_class == R600) { > + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; > + } > > /* unreference old buffer and reference new one */ > rstate->id = R600_PIPE_STATE_FRAMEBUFFER; > > util_copy_framebuffer_state(&rctx->framebuffer, state); > > + > /* Colorbuffers. */ > rctx->export_16bpc = true; > rctx->nr_cbufs = state->nr_cbufs; > @@ -2125,14 +2135,7 @@ void r600_adjust_gprs(struct r600_context *rctx) > unsigned tmp; > int diff; > > - /* XXX: Following call moved from r600_bind_[ps|vs]_shader, > - * it seems eg+ doesn't need it, r6xx/7xx probably need it only for > - * adjusting the GPR allocation? > - * Do we need this if we aren't really changing config below? */ > - r600_inval_shader_cache(rctx); > - > - if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > - { > + if (rctx->ps_shader->current->shader.bc.ngpr > rctx->default_ps_gprs) > { > diff = rctx->ps_shader->current->shader.bc.ngpr - > rctx->default_ps_gprs; > num_vs_gprs -= diff; > num_ps_gprs += diff; > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index e67eba8..b94d1b2 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -56,27 +56,6 @@ void r600_release_command_buffer(struct > r600_command_buffer *cb) > FREE(cb->buf); > } > > -static void r600_emit_surface_sync(struct r600_context *rctx, struct > r600_atom *atom) > -{ > - struct radeon_winsys_cs *cs = rctx->cs; > - struct r600_surface_sync_cmd *a = (struct r600_surface_sync_cmd*)atom; > - > - cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); > - cs->buf[cs->cdw++] = a->flush_flags; /* CP_COHER_CNTL */ > - cs->buf[cs->cdw++] = 0xffffffff; /* CP_COHER_SIZE */ > - cs->buf[cs->cdw++] = 0; /* CP_COHER_BASE */ > - cs->buf[cs->cdw++] = 0x0000000A; /* POLL_INTERVAL */ > - > - a->flush_flags = 0; > -} > - > -static void r600_emit_r6xx_flush_and_inv(struct r600_context *rctx, struct > r600_atom *atom) > -{ > - struct radeon_winsys_cs *cs = rctx->cs; > - cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); > - cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT) > | EVENT_INDEX(0); > -} > - > void r600_init_atom(struct r600_context *rctx, > struct r600_atom *atom, > unsigned id, > @@ -108,37 +87,16 @@ void r600_emit_alphatest_state(struct r600_context > *rctx, struct r600_atom *atom > r600_write_context_reg(cs, R_028438_SX_ALPHA_REF, alpha_ref); > } > > -void r600_init_common_atoms(struct r600_context *rctx) > -{ > - r600_init_atom(rctx, &rctx->r6xx_flush_and_inv_cmd, 2, > r600_emit_r6xx_flush_and_inv, 2); > - r600_init_atom(rctx, &rctx->surface_sync_cmd.atom, 3, > r600_emit_surface_sync, 5); > -} > - > -unsigned r600_get_cb_flush_flags(struct r600_context *rctx) > -{ > - unsigned flags = 0; > - > - if (rctx->framebuffer.nr_cbufs) { > - flags |= S_0085F0_CB_ACTION_ENA(1) | > - (((1 << rctx->framebuffer.nr_cbufs) - 1) << > S_0085F0_CB0_DEST_BASE_ENA_SHIFT); > - } > - > - /* Workaround for broken flushing on some R6xx chipsets. */ > - if (rctx->family == CHIP_RV670 || > - rctx->family == CHIP_RS780 || > - rctx->family == CHIP_RS880) { > - flags |= S_0085F0_CB1_DEST_BASE_ENA(1) | > - S_0085F0_DEST_BASE_0_ENA(1); > - } > - return flags; > -} > - > void r600_texture_barrier(struct pipe_context *ctx) > { > struct r600_context *rctx = (struct r600_context *)ctx; > > - rctx->surface_sync_cmd.flush_flags |= S_0085F0_TC_ACTION_ENA(1) | > r600_get_cb_flush_flags(rctx); > - r600_atom_dirty(rctx, &rctx->surface_sync_cmd.atom); > + rctx->flags |= R600_CONTEXT_CB_FLUSH | R600_CONTEXT_TEX_FLUSH; > + > + /* R6xx errata */ > + if (rctx->chip_class == R600) { > + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV; > + } > } > > static bool r600_conv_pipe_prim(unsigned pprim, unsigned *prim) > @@ -424,7 +382,7 @@ static void r600_bind_samplers(struct pipe_context *pipe, > } > if (sampler->border_color_use) { > dst->atom_sampler.num_dw += 11; > - rctx->flags |= R600_PARTIAL_FLUSH; > + rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > } else { > dst->atom_sampler.num_dw += 5; > } > @@ -432,7 +390,7 @@ static void r600_bind_samplers(struct pipe_context *pipe, > } > if (rctx->chip_class <= R700 && seamless_cube_map != -1 && > seamless_cube_map != rctx->seamless_cube_map.enabled) { > /* change in TA_CNTL_AUX need a pipeline flush */ > - rctx->flags |= R600_PARTIAL_FLUSH; > + rctx->flags |= R600_CONTEXT_PS_PARTIAL_FLUSH; > rctx->seamless_cube_map.enabled = seamless_cube_map; > r600_atom_dirty(rctx, &rctx->seamless_cube_map.atom); > } > @@ -477,8 +435,6 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, > void *state) > > rctx->vertex_elements = v; > if (v) { > - r600_inval_shader_cache(rctx); > - > rctx->states[v->rstate.id] = &v->rstate; > r600_context_pipe_state_set(rctx, &v->rstate); > } > @@ -515,7 +471,7 @@ void r600_set_index_buffer(struct pipe_context *ctx, > void r600_vertex_buffers_dirty(struct r600_context *rctx) > { > if (rctx->vertex_buffer_state.dirty_mask) { > - r600_inval_vertex_cache(rctx); > + rctx->flags |= rctx->has_vertex_cache ? > R600_CONTEXT_VTX_FLUSH : R600_CONTEXT_TEX_FLUSH; > rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= > EVERGREEN ? 12 : 11) * > > util_bitcount(rctx->vertex_buffer_state.dirty_mask); > r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom); > @@ -570,7 +526,7 @@ void r600_sampler_views_dirty(struct r600_context *rctx, > struct r600_samplerview_state *state) > { > if (state->dirty_mask) { > - r600_inval_texture_cache(rctx); > + rctx->flags |= R600_CONTEXT_TEX_FLUSH; > state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 14 : > 13) * > util_bitcount(state->dirty_mask); > r600_atom_dirty(rctx, &state->atom); > @@ -898,7 +854,7 @@ void r600_delete_vs_shader(struct pipe_context *ctx, void > *state) > void r600_constant_buffers_dirty(struct r600_context *rctx, struct > r600_constbuf_state *state) > { > if (state->dirty_mask) { > - r600_inval_shader_cache(rctx); > + rctx->flags |= R600_CONTEXT_SHADERCONST_FLUSH; > state->atom.num_dw = rctx->chip_class >= EVERGREEN ? > util_bitcount(state->dirty_mask)*20 > : > util_bitcount(state->dirty_mask)*19; > r600_atom_dirty(rctx, &state->atom); > @@ -1148,13 +1104,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const > struct pipe_draw_info *dinfo) > > r600_update_derived_state(rctx); > > - /* partial flush triggered by border color change */ > - if (rctx->flags & R600_PARTIAL_FLUSH) { > - rctx->flags &= ~R600_PARTIAL_FLUSH; > - r600_write_value(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); > - r600_write_value(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) > | EVENT_INDEX(4)); > - } > - > if (info.indexed) { > /* Initialize the index buffer struct. */ > pipe_resource_reference(&ib.buffer, > rctx->index_buffer.buffer); > @@ -1221,6 +1170,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const > struct pipe_draw_info *dinfo) > > /* Emit states (the function expects that we emit at most 17 dwords > here). */ > r600_need_cs_space(rctx, 0, TRUE); > + r600_flush_emit(rctx); > > for (i = 0; i < R600_MAX_ATOM; i++) { > if (rctx->atoms[i] == NULL || !rctx->atoms[i]->dirty) { > @@ -1275,8 +1225,6 @@ void r600_draw_vbo(struct pipe_context *ctx, const > struct pipe_draw_info *dinfo) > (info.count_from_stream_output ? > S_0287F0_USE_OPAQUE(1) : 0); > } > > - rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY | > R600_CONTEXT_DRAW_PENDING; > - > /* Set the depth buffer as dirty. */ > if (rctx->framebuffer.zsbuf) { > struct pipe_surface *surf = rctx->framebuffer.zsbuf; > diff --git a/src/gallium/drivers/r600/r600d.h > b/src/gallium/drivers/r600/r600d.h > index 28423e1..4bd7716 100644 > --- a/src/gallium/drivers/r600/r600d.h > +++ b/src/gallium/drivers/r600/r600d.h > @@ -3341,9 +3341,21 @@ > #define S_0085F0_DB_DEST_BASE_ENA(x) (((x) & 0x1) << 14) > #define G_0085F0_DB_DEST_BASE_ENA(x) (((x) >> 14) & 0x1) > #define C_0085F0_DB_DEST_BASE_ENA 0xFFFFBFFF > +/* r600 only start */ > #define S_0085F0_CR_DEST_BASE_ENA(x) (((x) & 0x1) << 15) > #define G_0085F0_CR_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) > #define C_0085F0_CR_DEST_BASE_ENA 0xFFFF7FFF > +/* r600 only end */ > +/* evergreen only start */ > +#define S_0085F0_CB8_DEST_BASE_ENA(x) (((x) & 0x1) << 15) > +#define G_0085F0_CB8_DEST_BASE_ENA(x) (((x) >> 15) & 0x1) > +#define S_0085F0_CB9_DEST_BASE_ENA(x) (((x) & 0x1) << 16) > +#define G_0085F0_CB9_DEST_BASE_ENA(x) (((x) >> 16) & 0x1) > +#define S_0085F0_CB10_DEST_BASE_ENA(x) (((x) & 0x1) << 17) > +#define G_0085F0_CB10_DEST_BASE_ENA(x) (((x) >> 17) & 0x1) > +#define S_0085F0_CB11_DEST_BASE_ENA(x) (((x) & 0x1) << 18) > +#define G_0085F0_CB11_DEST_BASE_ENA(x) (((x) >> 18) & 0x1) > +/* evergreen only end */ > #define S_0085F0_TC_ACTION_ENA(x) (((x) & 0x1) << 23) > #define G_0085F0_TC_ACTION_ENA(x) (((x) >> 23) & 0x1) > #define C_0085F0_TC_ACTION_ENA 0xFF7FFFFF > -- > 1.7.9.5 > > _______________________________________________ > mesa-dev mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
