On 12.10.2016 15:54, Marek Olšák wrote:
From: Marek Olšák
so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.
Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.
This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
I don't see a measurable increase in performance though.
(I tested Talos Principle and DiRT: Showdown, the latter is improved by
0.5%, which is almost noise, and it originally used layered Z16,
so at least we know that Z16 promoted to Z32F isn't slower now)
Reviewed-by: Nicolai Hähnle
---
src/gallium/drivers/radeon/r600_pipe_common.h | 3 ++
src/gallium/drivers/radeon/r600_texture.c | 67 ++
src/gallium/drivers/radeon/radeon_winsys.h | 4 ++
src/gallium/drivers/radeonsi/si_blit.c | 11 -
src/gallium/drivers/radeonsi/si_descriptors.c | 7 ++-
src/gallium/drivers/radeonsi/si_shader.c | 18 ++-
src/gallium/drivers/radeonsi/si_state.c| 39 +--
src/gallium/drivers/radeonsi/si_state_draw.c | 3 +-
src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 --
9 files changed, 185 insertions(+), 24 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 290b228..5cfcad6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -238,27 +238,29 @@ struct r600_cmask_info {
unsigned yalign;
unsigned slice_tile_max;
unsigned base_address_reg;
};
struct r600_htile_info {
unsigned pitch;
unsigned height;
unsigned xalign;
unsigned yalign;
+ unsigned alignment;
};
struct r600_texture {
struct r600_resourceresource;
uint64_tsize;
unsignednum_level0_transfers;
+ enum pipe_formatdb_render_format;
boolis_depth;
booldb_compatible;
boolcan_sample_z;
boolcan_sample_s;
unsigneddirty_level_mask; /* each bit says if
that mipmap is compressed */
unsignedstencil_dirty_level_mask; /* each bit
says if that mipmap is compressed */
struct r600_texture *flushed_depth_texture;
struct radeon_surf surface;
/* Colorbuffer compression and fast clear. */
@@ -266,20 +268,21 @@ struct r600_texture {
struct r600_cmask_info cmask;
struct r600_resource*cmask_buffer;
uint64_tdcc_offset; /* 0 = disabled */
unsignedcb_color_info; /* fast clear enable bit
*/
unsignedcolor_clear_value[2];
unsignedlast_msaa_resolve_target_micro_mode;
/* Depth buffer compression and fast clear. */
struct r600_htile_info htile;
struct r600_resource*htile_buffer;
+ booltc_compatible_htile;
booldepth_cleared; /* if it was cleared at
least once */
float depth_clear_value;
boolstencil_cleared; /* if it was cleared
at least once */
uint8_t stencil_clear_value;
boolnon_disp_tiling; /* R600-Cayman only */
/* Whether the texture is a displayable back buffer and needs DCC
* decompression, which is expensive. Therefore, it's enabled only
* if statistics suggest that it will pay off and it's allocated
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 57cdbcf..625d091 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -185,21 +185,22 @@ static unsigned r600_texture_get_offset(struct
r600_texture *rtex, unsigned leve
return rtex->surface.level[level].offset +
box->z * rtex->surface.level[level].slice_size +
box->y / util_format_get_blockheight(format) *
rtex->surface.level[level].pitch_bytes +
box->x / util_format_get_blockwidth(format) *
util_format_get_blocksize(format);
}
static int r600_init_surface(struct r600_common_screen *rscreen,
struct radeon_surf *surface,
const struct pipe_resource *ptex,