Reviewed-by: Marek Olšák <[email protected]> Marek
On Thu, Jan 31, 2013 at 4:38 PM, <[email protected]> wrote: > From: Jerome Glisse <[email protected]> > > We are now seing cs that can go over the vram+gtt size to avoid > failing flush early cs that goes over 70% (gtt+vram) usage. 70% > is use to allow some fragmentation. > > The idea is to compute a gross estimate of memory requirement of > each draw call. After each draw call, memory will be precisely > accounted. So the uncertainty is only on the current draw call. > In practice this gave very good estimate (+/- 10% of the target > memory limit). > > v2: Remove left over from testing version, remove useless NULL > checking. Improve commit message. > > Signed-off-by: Jerome Glisse <[email protected]> > --- > src/gallium/drivers/r600/evergreen_state.c | 4 ++++ > src/gallium/drivers/r600/r600_hw_context.c | 12 ++++++++++++ > src/gallium/drivers/r600/r600_pipe.h | 21 +++++++++++++++++++++ > src/gallium/drivers/r600/r600_state.c | 3 +++ > src/gallium/drivers/r600/r600_state_common.c | 13 ++++++++++++- > src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +++++++++++ > src/gallium/winsys/radeon/drm/radeon_winsys.h | 10 ++++++++++ > 7 files changed, 73 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/r600/evergreen_state.c > b/src/gallium/drivers/r600/evergreen_state.c > index 0a3861f..5dd8b13 100644 > --- a/src/gallium/drivers/r600/evergreen_state.c > +++ b/src/gallium/drivers/r600/evergreen_state.c > @@ -1668,6 +1668,8 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > surf = (struct r600_surface*)state->cbufs[i]; > rtex = (struct r600_texture*)surf->base.texture; > > + r600_context_add_resource_size(ctx, state->cbufs[i]->texture); > + > if (!surf->color_initialized) { > evergreen_init_color_surface(rctx, surf); > } > @@ -1699,6 +1701,8 @@ static void evergreen_set_framebuffer_state(struct > pipe_context *ctx, > if (state->zsbuf) { > surf = (struct r600_surface*)state->zsbuf; > > + r600_context_add_resource_size(ctx, state->zsbuf->texture); > + > if (!surf->depth_initialized) { > evergreen_init_depth_surface(rctx, surf); > } > diff --git a/src/gallium/drivers/r600/r600_hw_context.c > b/src/gallium/drivers/r600/r600_hw_context.c > index 23f488a..a89f230 100644 > --- a/src/gallium/drivers/r600/r600_hw_context.c > +++ b/src/gallium/drivers/r600/r600_hw_context.c > @@ -359,6 +359,16 @@ out_err: > void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, > boolean count_draw_in) > { > + if (!ctx->ws->cs_memory_below_limit(ctx->rings.gfx.cs, ctx->vram, > ctx->gtt)) { > + ctx->gtt = 0; > + ctx->vram = 0; > + ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC); > + return; > + } > + /* all will be accounted once relocation are emited */ > + ctx->gtt = 0; > + ctx->vram = 0; > + > /* The number of dwords we already used in the CS so far. */ > num_dw += ctx->rings.gfx.cs->cdw; > > @@ -784,6 +794,8 @@ void r600_begin_new_cs(struct r600_context *ctx) > > ctx->pm4_dirty_cdwords = 0; > ctx->flags = 0; > + ctx->gtt = 0; > + ctx->vram = 0; > > /* Begin a new CS. */ > r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd); > diff --git a/src/gallium/drivers/r600/r600_pipe.h > b/src/gallium/drivers/r600/r600_pipe.h > index 3ff42d3..42b4e7c 100644 > --- a/src/gallium/drivers/r600/r600_pipe.h > +++ b/src/gallium/drivers/r600/r600_pipe.h > @@ -447,6 +447,10 @@ struct r600_context { > unsigned backend_mask; > unsigned max_db; /* for OQ */ > > + /* current unaccounted memory usage */ > + uint64_t vram; > + uint64_t gtt; > + > /* Miscellaneous state objects. */ > void *custom_dsa_flush; > void *custom_blend_resolve; > @@ -998,4 +1002,21 @@ static INLINE unsigned u_max_layer(struct pipe_resource > *r, unsigned level) > } > } > > +static INLINE void r600_context_add_resource_size(struct pipe_context *ctx, > struct pipe_resource *r) > +{ > + struct r600_context *rctx = (struct r600_context *)ctx; > + struct r600_resource *rr = (struct r600_resource *)r; > + > + if (r == NULL) { > + return; > + } > + > + if (rr->domains & RADEON_DOMAIN_GTT) { > + rctx->gtt += rr->buf->size; > + } > + if (rr->domains & RADEON_DOMAIN_VRAM) { > + rctx->vram += rr->buf->size; > + } > +} > + > #endif > diff --git a/src/gallium/drivers/r600/r600_state.c > b/src/gallium/drivers/r600/r600_state.c > index c0bc2a5..44cd00e 100644 > --- a/src/gallium/drivers/r600/r600_state.c > +++ b/src/gallium/drivers/r600/r600_state.c > @@ -1544,6 +1544,7 @@ static void r600_set_framebuffer_state(struct > pipe_context *ctx, > > surf = (struct r600_surface*)state->cbufs[i]; > rtex = (struct r600_texture*)surf->base.texture; > + r600_context_add_resource_size(ctx, state->cbufs[i]->texture); > > if (!surf->color_initialized || force_cmask_fmask) { > r600_init_color_surface(rctx, surf, > force_cmask_fmask); > @@ -1576,6 +1577,8 @@ static void r600_set_framebuffer_state(struct > pipe_context *ctx, > if (state->zsbuf) { > surf = (struct r600_surface*)state->zsbuf; > > + r600_context_add_resource_size(ctx, state->zsbuf->texture); > + > if (!surf->depth_initialized) { > r600_init_depth_surface(rctx, surf); > } > diff --git a/src/gallium/drivers/r600/r600_state_common.c > b/src/gallium/drivers/r600/r600_state_common.c > index 9386f61..33200a6 100644 > --- a/src/gallium/drivers/r600/r600_state_common.c > +++ b/src/gallium/drivers/r600/r600_state_common.c > @@ -479,7 +479,8 @@ static void r600_set_index_buffer(struct pipe_context > *ctx, > > if (ib) { > pipe_resource_reference(&rctx->index_buffer.buffer, > ib->buffer); > - memcpy(&rctx->index_buffer, ib, sizeof(*ib)); > + memcpy(&rctx->index_buffer, ib, sizeof(*ib)); > + r600_context_add_resource_size(ctx, ib->buffer); > } else { > pipe_resource_reference(&rctx->index_buffer.buffer, NULL); > } > @@ -516,6 +517,7 @@ static void r600_set_vertex_buffers(struct pipe_context > *ctx, > vb[i].buffer_offset = > input[i].buffer_offset; > > pipe_resource_reference(&vb[i].buffer, input[i].buffer); > new_buffer_mask |= 1 << i; > + r600_context_add_resource_size(ctx, > input[i].buffer); > } else { > > pipe_resource_reference(&vb[i].buffer, NULL); > disable_mask |= 1 << i; > @@ -613,6 +615,7 @@ static void r600_set_sampler_views(struct pipe_context > *pipe, unsigned shader, > > pipe_sampler_view_reference((struct pipe_sampler_view > **)&dst->views.views[i], views[i]); > new_mask |= 1 << i; > + r600_context_add_resource_size(pipe, > views[i]->texture); > } else { > pipe_sampler_view_reference((struct pipe_sampler_view > **)&dst->views.views[i], NULL); > disable_mask |= 1 << i; > @@ -806,6 +809,8 @@ static void r600_bind_ps_state(struct pipe_context *ctx, > void *state) > rctx->ps_shader = (struct r600_pipe_shader_selector *)state; > r600_context_pipe_state_set(rctx, &rctx->ps_shader->current->rstate); > > + r600_context_add_resource_size(ctx, (struct pipe_resource > *)rctx->ps_shader->current->bo); > + > if (rctx->chip_class <= R700) { > bool multiwrite = > rctx->ps_shader->current->shader.fs_write_all; > > @@ -835,6 +840,8 @@ static void r600_bind_vs_state(struct pipe_context *ctx, > void *state) > if (state) { > r600_context_pipe_state_set(rctx, > &rctx->vs_shader->current->rstate); > > + r600_context_add_resource_size(ctx, (struct pipe_resource > *)rctx->vs_shader->current->bo); > + > /* Update clip misc state. */ > if (rctx->vs_shader->current->pa_cl_vs_out_cntl != > rctx->clip_misc_state.pa_cl_vs_out_cntl || > rctx->vs_shader->current->shader.clip_dist_write != > rctx->clip_misc_state.clip_dist_write) { > @@ -938,10 +945,13 @@ static void r600_set_constant_buffer(struct > pipe_context *ctx, uint shader, uint > } else { > u_upload_data(rctx->uploader, 0, input->buffer_size, > ptr, &cb->buffer_offset, &cb->buffer); > } > + /* account it in gtt */ > + rctx->gtt += input->buffer_size; > } else { > /* Setup the hw buffer. */ > cb->buffer_offset = input->buffer_offset; > pipe_resource_reference(&cb->buffer, input->buffer); > + r600_context_add_resource_size(ctx, input->buffer); > } > > state->enabled_mask |= 1 << index; > @@ -1004,6 +1014,7 @@ static void r600_set_so_targets(struct pipe_context > *ctx, > /* Set the new targets. */ > for (i = 0; i < num_targets; i++) { > pipe_so_target_reference((struct > pipe_stream_output_target**)&rctx->so_targets[i], targets[i]); > + r600_context_add_resource_size(ctx, targets[i]->buffer); > } > for (; i < rctx->num_so_targets; i++) { > pipe_so_target_reference((struct > pipe_stream_output_target**)&rctx->so_targets[i], NULL); > diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > index cab2704..6a7115b 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c > @@ -383,6 +383,16 @@ static boolean radeon_drm_cs_validate(struct > radeon_winsys_cs *rcs) > return status; > } > > +static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs > *rcs, uint64_t vram, uint64_t gtt) > +{ > + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); > + boolean status = > + (cs->csc->used_gart + gtt) < cs->ws->info.gart_size * 0.7 && > + (cs->csc->used_vram + vram) < cs->ws->info.vram_size * 0.7; > + > + return status; > +} > + > static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, > struct radeon_winsys_cs_handle *buf) > { > @@ -575,6 +585,7 @@ void radeon_drm_cs_init_functions(struct > radeon_drm_winsys *ws) > ws->base.cs_destroy = radeon_drm_cs_destroy; > ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; > ws->base.cs_validate = radeon_drm_cs_validate; > + ws->base.cs_memory_below_limit = radeon_drm_cs_memory_below_limit; > ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; > ws->base.cs_flush = radeon_drm_cs_flush; > ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; > diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h > b/src/gallium/winsys/radeon/drm/radeon_winsys.h > index 7fdef3f..8b64ef2 100644 > --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h > +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h > @@ -393,6 +393,16 @@ struct radeon_winsys { > boolean (*cs_validate)(struct radeon_winsys_cs *cs); > > /** > + * Return TRUE if there is enough memory in VRAM and GTT for the relocs > + * added so far. > + * > + * \param cs A command stream to validate. > + * \param vram VRAM memory size pending to be use > + * \param gtt GTT memory size pending to be use > + */ > + boolean (*cs_memory_below_limit)(struct radeon_winsys_cs *cs, uint64_t > vram, uint64_t gtt); > + > + /** > * Write a relocated dword to a command buffer. > * > * \param cs A command stream the relocation is written to. > -- > 1.7.11.7 > > _______________________________________________ > mesa-dev mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
