On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák <mar...@gmail.com> wrote: > On Wed, Oct 21, 2015 at 12:10 AM, Bas Nieuwenhuizen > <b...@basnieuwenhuizen.nl> wrote: >> Uses the DCC buffer instead of the CMASK buffer. The ELIMINATE_FAST_CLEAR >> still works. Furthermore, with DCC compression we can directly clear >> to a limited set of colors such that we do not need a postprocessing step. >> >> Signed-off-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> >> --- >> src/gallium/drivers/radeon/r600_texture.c | 105 >> +++++++++++++++++++++++--- >> src/gallium/drivers/radeonsi/si_blit.c | 4 +- >> src/gallium/drivers/radeonsi/si_descriptors.c | 2 +- >> 3 files changed, 97 insertions(+), 14 deletions(-) >> >> diff --git a/src/gallium/drivers/radeon/r600_texture.c >> b/src/gallium/drivers/radeon/r600_texture.c >> index 0314049..4391665 100644 >> --- a/src/gallium/drivers/radeon/r600_texture.c >> +++ b/src/gallium/drivers/radeon/r600_texture.c >> @@ -1239,6 +1239,79 @@ static void evergreen_set_clear_color(struct >> r600_texture *rtex, >> memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t)); >> } >> >> +static void vi_get_fast_clear_parameters(enum pipe_format surface_format, >> + const union pipe_color_union *color, >> + uint32_t* reset_value, >> + bool* clear_words_needed) >> +{ >> + bool values[4] = {}; >> + bool main_value = false; >> + int i; >> + int extra_channel; >> + int extra_component = 0; >> + const struct util_format_description *desc = >> util_format_description(surface_format); >> + >> + *clear_words_needed = true; >> + *reset_value = 0x20202020U; >> + >> + /* If we want to clear without needing a fast clear eliminate step, >> we can set each channel to >> + * 0 or 1 (or 0/max for integer formats). We have two sets of flags, >> one for the last or first >> + * channel and one for the rest. We decide on the last or first >> channel by r600_translate_colorswap. >> + * >> + * Note that in formats as R8G8B8X*, the X8 is the last channel, so >> the last channel may not correspond >> + * to the last enabled component. >> + */ >> + >> + /* Not sure if it is a coincidence that these are all the 3-channel >> color formats. */ >> + if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT || >> + surface_format == PIPE_FORMAT_B5G6R5_UNORM || >> + surface_format == PIPE_FORMAT_B5G6R5_SRGB) { >> + extra_channel = -1; >> + } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { >> + extra_channel = (r600_translate_colorswap(surface_format) <= >> 1) ? desc->nr_channels - 1 : 0; >> + } else >> + return; >> + >> + for (i = 0; i < 4; ++i) { >> + int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X; >> + >> + if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || >> desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W) >> + continue; >> + >> + >> + if (util_format_is_pure_sint(surface_format)) { >> + values[i] = color->i[i] != 0; >> + if (color->i[i] != 0 && color->i[i] != (1ULL << >> (desc->channel[index].size - 1)) - 1) >> + return; >> + } else if (util_format_is_pure_uint(surface_format)) { >> + values[i] = color->ui[i] != 0U; >> + if (color->ui[i] != 0U && color->ui[i] != (1ULL << >> desc->channel[index].size) - 1) >> + return; >> + } else { >> + values[i] = color->f[i] != 0.0F; >> + if (color->f[i] != 0.0F && color->f[i] != 1.0F) >> + return; >> + } >> + >> + if (index == extra_channel) >> + extra_component = i; >> + else >> + main_value = values[i]; >> + } >> + >> + for (int i = 0; i < 4; ++i) >> + if (values[i] != main_value && desc->swizzle[i] - >> UTIL_FORMAT_SWIZZLE_X != extra_channel && >> + desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && >> desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W) >> + return; >> + >> + *clear_words_needed = false; >> + if (main_value) >> + *reset_value |= 0x80808080U; >> + >> + if (values[extra_component]) >> + *reset_value |= 0x40404040U; > > Could you please reformat this function and rename things to be more > readable? "main" is color and "extra" is alpha, right? if yes, they > should be called color and alpha. Also, 80 characters per line where > possible.
Extra is not necessarily alpha. I.e. for R8G8 it actually is the green component. > >> +} >> + >> void evergreen_do_fast_color_clear(struct r600_common_context *rctx, >> struct pipe_framebuffer_state *fb, >> struct r600_atom *fb_state, >> @@ -1292,23 +1365,33 @@ void evergreen_do_fast_color_clear(struct >> r600_common_context *rctx, >> continue; >> } >> >> - /* CMASK clear does not work for DCC compressed textures */ >> if (tex->surface.dcc_enabled) { >> - continue; >> - } >> + uint32_t reset_value; >> + bool clear_words_needed; >> >> - /* ensure CMASK is enabled */ >> - r600_texture_alloc_cmask_separate(rctx->screen, tex); >> - if (tex->cmask.size == 0) { >> - continue; >> + vi_get_fast_clear_parameters(fb->cbufs[i]->format, >> color, &reset_value, &clear_words_needed); >> + >> + rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b, >> + 0, tex->surface.dcc_size, >> reset_value, true); >> + >> + if (clear_words_needed) >> + tex->dirty_level_mask |= 1 << >> fb->cbufs[i]->u.tex.level; >> + } else { >> + /* ensure CMASK is enabled */ >> + r600_texture_alloc_cmask_separate(rctx->screen, tex); >> + if (tex->cmask.size == 0) { >> + continue; >> + } >> + >> + /* Do the fast clear. */ >> + rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, >> + tex->cmask.offset, tex->cmask.size, >> 0, true); >> + >> + tex->dirty_level_mask |= 1 << >> fb->cbufs[i]->u.tex.level; >> } >> >> - /* Do the fast clear. */ >> evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); >> - rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, >> - tex->cmask.offset, tex->cmask.size, 0, >> true); >> >> - tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; >> if (dirty_cbufs) >> *dirty_cbufs |= 1 << i; >> rctx->set_atom_dirty(rctx, fb_state, true); >> diff --git a/src/gallium/drivers/radeonsi/si_blit.c >> b/src/gallium/drivers/radeonsi/si_blit.c >> index a1af4f8..06e7e66 100644 >> --- a/src/gallium/drivers/radeonsi/si_blit.c >> +++ b/src/gallium/drivers/radeonsi/si_blit.c >> @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context >> *sctx, >> assert(view); >> >> tex = (struct r600_texture *)view->texture; >> - assert(tex->cmask.size || tex->fmask.size); >> + assert(tex->cmask.size || tex->fmask.size || >> tex->surface.dcc_enabled); >> >> si_blit_decompress_color(&sctx->b.b, tex, >> view->u.tex.first_level, >> view->u.tex.last_level, >> @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct >> pipe_context *ctx, >> si_blit_decompress_depth_in_place(sctx, rtex, true, >> level, level, >> first_layer, >> last_layer); >> - } else if (rtex->fmask.size || rtex->cmask.size) { >> + } else if (rtex->fmask.size || rtex->cmask.size || >> rtex->surface.dcc_enabled) { >> si_blit_decompress_color(ctx, rtex, level, level, >> first_layer, last_layer); >> } >> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c >> b/src/gallium/drivers/radeonsi/si_descriptors.c >> index 5548cba3..a277fa5 100644 >> --- a/src/gallium/drivers/radeonsi/si_descriptors.c >> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c >> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context >> *ctx, >> } else { >> samplers->depth_texture_mask &= ~(1 << slot); >> } >> - if (rtex->cmask.size || rtex->fmask.size) { >> + if (rtex->cmask.size || rtex->fmask.size || >> rtex->surface.dcc_enabled) { >> samplers->compressed_colortex_mask |= 1 << >> slot; > > I'd like this flag to be set only when dirty_level_mask is non-zero. > Setting this for all textures that have DCC is quite expensive in draw > calls. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev