From: Marek Olšák <marek.ol...@amd.com> Invalidated buffers don't have to go through it.
Split r600_init_resource into r600_init_resource_fields and r600_alloc_resource. --- src/gallium/drivers/r600/r600_state_common.c | 5 +- src/gallium/drivers/radeon/r600_buffer_common.c | 84 ++++++++++++++----------- src/gallium/drivers/radeon/r600_pipe_common.h | 13 ++-- src/gallium/drivers/radeon/r600_texture.c | 9 +-- src/gallium/drivers/radeonsi/si_descriptors.c | 5 +- 5 files changed, 67 insertions(+), 49 deletions(-) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index a5341c3..0349432 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2774,26 +2774,25 @@ uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap) } } else { return ENDIAN_NONE; } } static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf) { struct r600_context *rctx = (struct r600_context*)ctx; struct r600_resource *rbuffer = r600_resource(buf); - unsigned i, shader, mask, alignment = rbuffer->buf->alignment; + unsigned i, shader, mask; struct r600_pipe_sampler_view *view; /* Reallocate the buffer in the same pipe_resource. */ - r600_init_resource(&rctx->screen->b, rbuffer, rbuffer->b.b.width0, - alignment); + r600_alloc_resource(&rctx->screen->b, rbuffer); /* We changed the buffer, now we need to bind it where the old one was bound. */ /* Vertex buffers. */ mask = rctx->vertex_buffer_state.enabled_mask; while (mask) { i = u_bit_scan(&mask); if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) { rctx->vertex_buffer_state.dirty_mask |= 1 << i; r600_vertex_buffers_dirty(rctx); } diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 4480293..6a55de1 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -92,97 +92,118 @@ void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, ctx->ws->cs_sync_flush(ctx->gfx.cs); if (ctx->dma.cs) ctx->ws->cs_sync_flush(ctx->dma.cs); } } /* Setting the CS to NULL will prevent doing checks we have done already. */ return ctx->ws->buffer_map(resource->buf, NULL, usage); } -bool r600_init_resource(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment) +void r600_init_resource_fields(struct r600_common_screen *rscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment) { struct r600_texture *rtex = (struct r600_texture*)res; - struct pb_buffer *old_buf, *new_buf; - enum radeon_bo_flag flags = 0; + + res->bo_size = size; + res->bo_alignment = alignment; + res->flags = 0; switch (res->b.b.usage) { case PIPE_USAGE_STREAM: - flags = RADEON_FLAG_GTT_WC; + res->flags = RADEON_FLAG_GTT_WC; /* fall through */ case PIPE_USAGE_STAGING: - /* Transfers are likely to occur more often with these resources. */ + /* Transfers are likely to occur more often with these + * resources. */ res->domains = RADEON_DOMAIN_GTT; break; case PIPE_USAGE_DYNAMIC: /* Older kernels didn't always flush the HDP cache before * CS execution */ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 40) { res->domains = RADEON_DOMAIN_GTT; - flags |= RADEON_FLAG_GTT_WC; + res->flags |= RADEON_FLAG_GTT_WC; break; } - flags |= RADEON_FLAG_CPU_ACCESS; + res->flags |= RADEON_FLAG_CPU_ACCESS; /* fall through */ case PIPE_USAGE_DEFAULT: case PIPE_USAGE_IMMUTABLE: default: - /* Not listing GTT here improves performance in some apps. */ + /* Not listing GTT here improves performance in some + * apps. */ res->domains = RADEON_DOMAIN_VRAM; - flags |= RADEON_FLAG_GTT_WC; + res->flags |= RADEON_FLAG_GTT_WC; break; } if (res->b.b.target == PIPE_BUFFER && res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { - /* Use GTT for all persistent mappings with older kernels, - * because they didn't always flush the HDP cache before CS - * execution. + /* Use GTT for all persistent mappings with older + * kernels, because they didn't always flush the HDP + * cache before CS execution. * - * Write-combined CPU mappings are fine, the kernel ensures all CPU - * writes finish before the GPU executes a command stream. + * Write-combined CPU mappings are fine, the kernel + * ensures all CPU writes finish before the GPU + * executes a command stream. */ if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 40) res->domains = RADEON_DOMAIN_GTT; else if (res->domains & RADEON_DOMAIN_VRAM) - flags |= RADEON_FLAG_CPU_ACCESS; + res->flags |= RADEON_FLAG_CPU_ACCESS; } /* Tiled textures are unmappable. Always put them in VRAM. */ if (res->b.b.target != PIPE_BUFFER && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) { res->domains = RADEON_DOMAIN_VRAM; - flags &= ~RADEON_FLAG_CPU_ACCESS; - flags |= RADEON_FLAG_NO_CPU_ACCESS | + res->flags &= ~RADEON_FLAG_CPU_ACCESS; + res->flags |= RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC; } - /* If VRAM is just stolen system memory, allow both VRAM and GTT, - * whichever has free space. If a buffer is evicted from VRAM to GTT, - * it will stay there. + /* If VRAM is just stolen system memory, allow both VRAM and + * GTT, whichever has free space. If a buffer is evicted from + * VRAM to GTT, it will stay there. */ if (!rscreen->info.has_dedicated_vram && res->domains == RADEON_DOMAIN_VRAM) res->domains = RADEON_DOMAIN_VRAM_GTT; if (rscreen->debug_flags & DBG_NO_WC) - flags &= ~RADEON_FLAG_GTT_WC; + res->flags &= ~RADEON_FLAG_GTT_WC; + + /* Set expected VRAM and GART usage for the buffer. */ + res->vram_usage = 0; + res->gart_usage = 0; + + if (res->domains & RADEON_DOMAIN_VRAM) + res->vram_usage = size; + else if (res->domains & RADEON_DOMAIN_GTT) + res->gart_usage = size; +} + +bool r600_alloc_resource(struct r600_common_screen *rscreen, + struct r600_resource *res) +{ + struct pb_buffer *old_buf, *new_buf; /* Allocate a new resource. */ - new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, - res->domains, flags); + new_buf = rscreen->ws->buffer_create(rscreen->ws, res->bo_size, + res->bo_alignment, + res->domains, res->flags); if (!new_buf) { return false; } /* Replace the pointer such that if res->buf wasn't NULL, it won't be * NULL. This should prevent crashes with multiple contexts using * the same buffer where one of the contexts invalidates it while * the others are using it. */ old_buf = res->buf; res->buf = new_buf; /* should be atomic */ @@ -190,29 +211,20 @@ bool r600_init_resource(struct r600_common_screen *rscreen, if (rscreen->info.has_virtual_memory) res->gpu_address = rscreen->ws->buffer_get_virtual_address(res->buf); else res->gpu_address = 0; pb_reference(&old_buf, NULL); util_range_set_empty(&res->valid_buffer_range); res->TC_L2_dirty = false; - /* Set expected VRAM and GART usage for the buffer. */ - res->vram_usage = 0; - res->gart_usage = 0; - - if (res->domains & RADEON_DOMAIN_VRAM) - res->vram_usage = size; - else if (res->domains & RADEON_DOMAIN_GTT) - res->gart_usage = size; - /* Print debug information. */ if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %"PRIu64" bytes\n", res->gpu_address, res->gpu_address + res->buf->size, res->buf->size); } return true; } static void r600_buffer_destroy(struct pipe_screen *screen, @@ -509,21 +521,23 @@ r600_alloc_buffer_struct(struct pipe_screen *screen, return rbuffer; } struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment) { struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment)) { + r600_init_resource_fields(rscreen, rbuffer, templ->width0, alignment); + + if (!r600_alloc_resource(rscreen, rbuffer)) { FREE(rbuffer); return NULL; } return &rbuffer->b.b; } struct pipe_resource *r600_aligned_buffer_create(struct pipe_screen *screen, unsigned bind, unsigned usage, unsigned size, diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 5375044..1924535 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -167,22 +167,25 @@ void radeon_shader_binary_clean(struct radeon_shader_binary *b); struct r600_resource { struct u_resource b; /* Winsys objects. */ struct pb_buffer *buf; uint64_t gpu_address; /* Memory usage if the buffer placement is optimal. */ uint64_t vram_usage; uint64_t gart_usage; - /* Resource state. */ + /* Resource properties. */ + uint64_t bo_size; + unsigned bo_alignment; enum radeon_bo_domain domains; + enum radeon_bo_flag flags; /* The buffer range which is initialized (with a write transfer, * streamout, DMA, or as a random access target). The rest of * the buffer is considered invalid and can be mapped unsynchronized. * * This allows unsychronized mapping of a buffer range which hasn't * been used yet. It's for applications which forget to use * the unsynchronized map flag and expect the driver to figure it out. */ struct util_range valid_buffer_range; @@ -646,23 +649,25 @@ struct r600_common_context { bool r600_rings_is_buffer_referenced(struct r600_common_context *ctx, struct pb_buffer *buf, enum radeon_bo_usage usage); void *r600_buffer_map_sync_with_rings(struct r600_common_context *ctx, struct r600_resource *resource, unsigned usage); void r600_buffer_subdata(struct pipe_context *ctx, struct pipe_resource *buffer, unsigned usage, unsigned offset, unsigned size, const void *data); -bool r600_init_resource(struct r600_common_screen *rscreen, - struct r600_resource *res, - uint64_t size, unsigned alignment); +void r600_init_resource_fields(struct r600_common_screen *rscreen, + struct r600_resource *res, + uint64_t size, unsigned alignment); +bool r600_alloc_resource(struct r600_common_screen *rscreen, + struct r600_resource *res); struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); struct pipe_resource * r600_aligned_buffer_create(struct pipe_screen *screen, unsigned bind, unsigned usage, unsigned size, unsigned alignment); struct pipe_resource * r600_buffer_from_user_memory(struct pipe_screen *screen, diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index e19150b..fb3068a 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1096,22 +1096,24 @@ r600_texture_create_object(struct pipe_screen *screen, (buf || !(rscreen->debug_flags & DBG_NO_DCC)) && !(rtex->surface.flags & RADEON_SURF_SCANOUT)) { /* Reserve space for the DCC buffer. */ rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment); rtex->size = rtex->dcc_offset + rtex->surface.dcc_size; } } /* Now create the backing buffer. */ if (!buf) { - if (!r600_init_resource(rscreen, resource, rtex->size, - rtex->surface.bo_alignment)) { + r600_init_resource_fields(rscreen, resource, rtex->size, + rtex->surface.bo_alignment); + + if (!r600_alloc_resource(rscreen, resource)) { FREE(rtex); return NULL; } } else { resource->buf = buf; resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf); resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf); } if (rtex->cmask.size) { @@ -1411,22 +1413,21 @@ static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen, static void r600_texture_invalidate_storage(struct r600_common_context *rctx, struct r600_texture *rtex) { struct r600_common_screen *rscreen = rctx->screen; /* There is no point in discarding depth and tiled buffers. */ assert(!rtex->is_depth); assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED); /* Reallocate the buffer in the same pipe_resource. */ - r600_init_resource(rscreen, &rtex->resource, rtex->size, - rtex->surface.bo_alignment); + r600_alloc_resource(rscreen, &rtex->resource); /* Initialize the CMASK base address (needed even without CMASK). */ rtex->cmask.base_address_reg = (rtex->resource.gpu_address + rtex->cmask.offset) >> 8; r600_dirty_all_framebuffer_states(rscreen); p_atomic_inc(&rscreen->dirty_tex_descriptor_counter); rctx->num_alloc_tex_transfer_bytes += rtex->size; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 0e026e9..b3174c6 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1416,29 +1416,28 @@ static void si_reset_buffer_resources(struct si_context *sctx, * bound. * * This is used to avoid CPU-GPU synchronizations, because it makes the buffer * idle by discarding its contents. Apps usually tell us when to do this using * map_buffer flags, for example. */ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf) { struct si_context *sctx = (struct si_context*)ctx; struct r600_resource *rbuffer = r600_resource(buf); - unsigned i, shader, alignment = rbuffer->buf->alignment; + unsigned i, shader; uint64_t old_va = rbuffer->gpu_address; unsigned num_elems = sctx->vertex_elements ? sctx->vertex_elements->count : 0; struct si_sampler_view *view; /* Reallocate the buffer in the same pipe_resource. */ - r600_init_resource(&sctx->screen->b, rbuffer, rbuffer->b.b.width0, - alignment); + r600_alloc_resource(&sctx->screen->b, rbuffer); /* We changed the buffer, now we need to bind it where the old one * was bound. This consists of 2 things: * 1) Updating the resource descriptor and dirtying it. * 2) Adding a relocation to the CS, so that it's usable. */ /* Vertex buffers. */ for (i = 0; i < num_elems; i++) { int vb = sctx->vertex_elements->elements[i].vertex_buffer_index; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev