Am 15.03.2018 07:56, schrieb Dieter Nützel:
For the series (1-3)

Tested-by: Dieter Nützel <die...@nuetzel-hh.de>

Are these numbers OK?

Triangle,Radeon RX 580 Series (POLARIS10 / DRM 3.25.0 /
4.16.0-rc1-1.g7262353-default+, LLVM 7.0.0),3.1 Mesa 18.1.0-devel
(git-a8cc051d2e),1920,1080,YES,Off,5240,60000,0,314471

Or should I retest without this series?

WAIT!!!

Marek I've retested back and forth with this series removed and 'GpuTest Triangle' and most other stuff of 'GpuTest' is ~10% _slower_ with _this_ series applied!

I've even booted with 'nopti, nospectre_v2' 'cause my 'old' X3470 Nehalem has NO 'PCID' and due to this is much regressed (10-20% with height CS count, kernel compilation down from 7:30 to 8:30) with the mitigation.

With mitigation:
My best run
_with_ this series was:       318668
_without_ this series was:  350645

'nopti, nospectre_v2'
_with_ this series was:       319327
_without_ this series was:  354074

System specs:
Xeon X3470 Nehalem, 2.93 GHz, 3.6 GHz max turbo, 4c/8t, 24 GB, RX580 8 GB
cpupower frequency-set -g performance

  current CPU frequency: 2.93 GHz (asserted by call to hardware)
  boost state support:
    Supported: yes
    Active: yes
    3200 MHz max turbo 4 active cores
    3200 MHz max turbo 3 active cores
    3467 MHz max turbo 2 active cores
    3600 MHz max turbo 1 active cores

Hope this helps.

Dieter

Am 11.03.2018 19:11, schrieb Marek Olšák:
From: Marek Olšák <marek.ol...@amd.com>

This should improve the score for the GpuTest Triangle benchmark.
Vulkan doesn't use this either.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
 src/gallium/drivers/radeon/r600_texture.c     | 11 +-------
src/gallium/drivers/radeonsi/si_clear.c | 37 ++-------------------------
 src/gallium/drivers/radeonsi/si_state.c       |  6 -----
 4 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 7941903..9701757 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -209,21 +209,20 @@ struct r600_cmask_info {
 struct r600_texture {
        struct r600_resource            resource;

        struct radeon_surf              surface;
        uint64_t                        size;
        struct r600_texture             *flushed_depth_texture;

        /* Colorbuffer compression and fast clear. */
        struct r600_fmask_info          fmask;
        struct r600_cmask_info          cmask;
-       struct r600_resource            *cmask_buffer;
        uint64_t                        dcc_offset; /* 0 = disabled */
        unsigned                        cb_color_info; /* fast clear enable bit 
*/
        unsigned                        color_clear_value[2];
        unsigned                        last_msaa_resolve_target_micro_mode;
        unsigned                        num_level0_transfers;

        /* Depth buffer compression and fast clear. */
        uint64_t                        htile_offset;
        float                           depth_clear_value;
uint16_t dirty_level_mask; /* each bit says if that mipmap is compressed */
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 125e7ef..03bc955 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen *sscreen,
 {
        if (!rtex->cmask.size)
                return;

        assert(rtex->resource.b.b.nr_samples <= 1);

        /* Disable CMASK. */
        memset(&rtex->cmask, 0, sizeof(rtex->cmask));
        rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
        rtex->dirty_level_mask = 0;
-
        rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);

-       if (rtex->cmask_buffer != &rtex->resource)
-           r600_resource_reference(&rtex->cmask_buffer, NULL);
-
        /* Notify all contexts about the change. */
        p_atomic_inc(&sscreen->dirty_tex_counter);
        p_atomic_inc(&sscreen->compressed_colortex_counter);
 }

 static bool r600_can_disable_dcc(struct r600_texture *rtex)
 {
        /* We can't disable DCC if it can be written by another process. */
        return rtex->dcc_offset &&
               (!rtex->resource.b.is_shared ||
@@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct
pipe_screen* screen,
                                              slice_size, whandle);
 }

 static void r600_texture_destroy(struct pipe_screen *screen,
                                 struct pipe_resource *ptex)
 {
        struct r600_texture *rtex = (struct r600_texture*)ptex;
        struct r600_resource *resource = &rtex->resource;

        r600_texture_reference(&rtex->flushed_depth_texture, NULL);
-
-       if (rtex->cmask_buffer != &rtex->resource) {
-           r600_resource_reference(&rtex->cmask_buffer, NULL);
-       }
        pb_reference(&resource->buf, NULL);
        r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
        r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
        FREE(rtex);
 }

 static const struct u_resource_vtbl r600_texture_vtbl;

/* The number of samples can be specified independently of the texture. */
 void si_texture_get_fmask_info(struct si_screen *sscreen,
@@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen *screen,
                        rtex->db_compatible = true;

                        if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
                                r600_texture_allocate_htile(sscreen, rtex);
                }
        } else {
                if (base->nr_samples > 1) {
                        if (!buf) {
                                r600_texture_allocate_fmask(sscreen, rtex);
                                r600_texture_allocate_cmask(sscreen, rtex);
-                               rtex->cmask_buffer = &rtex->resource;
                        }
                        if (!rtex->fmask.size || !rtex->cmask.size) {
                                FREE(rtex);
                                return NULL;
                        }
                }

                /* Shared textures must always set up DCC here.
                 * If it's not present, it will be disabled by
                 * apply_opaque_metadata later.
@@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen *screen,
                resource->bo_alignment = buf->alignment;
resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
                if (resource->domains & RADEON_DOMAIN_VRAM)
                        resource->vram_usage = buf->size;
                else if (resource->domains & RADEON_DOMAIN_GTT)
                        resource->gart_usage = buf->size;
        }

        if (rtex->cmask.size) {
                /* Initialize the cmask to 0xCC (= compressed state). */
-               si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
+               si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
                                         rtex->cmask.offset, rtex->cmask.size,
                                         0xCCCCCCCC);
        }
        if (rtex->htile_offset) {
                uint32_t clear_value = 0;

                if (sscreen->info.chip_class >= GFX9 || 
rtex->tc_compatible_htile)
                        clear_value = 0x0000030F;

                si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
diff --git a/src/gallium/drivers/radeonsi/si_clear.c
b/src/gallium/drivers/radeonsi/si_clear.c
index 464b9d7..a940aea 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -26,51 +26,20 @@

 #include "util/u_format.h"
 #include "util/u_pack_color.h"
 #include "util/u_surface.h"

 enum {
        SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
        SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
 };

-static void si_alloc_separate_cmask(struct si_screen *sscreen,
-                                   struct r600_texture *rtex)
-{
-       if (rtex->cmask_buffer)
-                return;
-
-       assert(rtex->cmask.size == 0);
-
-       si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
-       if (!rtex->cmask.size)
-               return;
-
-       rtex->cmask_buffer = (struct r600_resource *)
-               si_aligned_buffer_create(&sscreen->b,
-                                        R600_RESOURCE_FLAG_UNMAPPABLE,
-                                        PIPE_USAGE_DEFAULT,
-                                        rtex->cmask.size,
-                                        rtex->cmask.alignment);
-       if (rtex->cmask_buffer == NULL) {
-               rtex->cmask.size = 0;
-               return;
-       }
-
-       /* update colorbuffer state bits */
-       rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
-
-       rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
-
-       p_atomic_inc(&sscreen->compressed_colortex_counter);
-}
-
 static void si_set_clear_color(struct r600_texture *rtex,
                               enum pipe_format surface_format,
                               const union pipe_color_union *color)
 {
        union util_color uc;

        memset(&uc, 0, sizeof(uc));

        if (rtex->surface.bpe == 16) {
                /* DCC fast clear only:
@@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct
si_context *sctx,

                        if (clear_words_needed && too_small)
                                continue;

                        /* DCC fast clear with MSAA should clear CMASK to 0xC. 
*/
                        if (tex->resource.b.b.nr_samples >= 2 && 
tex->cmask.size) {
                                /* TODO: This doesn't work with MSAA. */
                                if (clear_words_needed)
                                        continue;

-                               si_clear_buffer(&sctx->b.b, 
&tex->cmask_buffer->b.b,
+                               si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
                                                tex->cmask.offset, 
tex->cmask.size,
                                                0xCCCCCCCC, 
R600_COHERENCY_CB_META);
                                need_decompress_pass = true;
                        }

                        vi_dcc_clear_level(sctx, tex, 0, reset_value);

                        if (clear_words_needed)
                                need_decompress_pass = true;

@@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct
si_context *sctx,

                        /* 128-bit formats are unusupported */
                        if (tex->surface.bpe > 8) {
                                continue;
                        }

                        /* RB+ doesn't work with CMASK fast clear on Stoney. */
                        if (sctx->b.family == CHIP_STONEY)
                                continue;

-                       /* ensure CMASK is enabled */
-                       si_alloc_separate_cmask(sctx->screen, tex);
                        if (tex->cmask.size == 0) {
                                continue;
                        }

                        /* Do the fast clear. */
-                       si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
+                       si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
                                        tex->cmask.offset, tex->cmask.size, 0,
                                        R600_COHERENCY_CB_META);
                        need_decompress_pass = true;
                }

                if (need_decompress_pass &&
                    !(tex->dirty_level_mask & (1 << level))) {
                        tex->dirty_level_mask |= 1 << level;
                        
p_atomic_inc(&sctx->screen->compressed_colortex_counter);
                }
diff --git a/src/gallium/drivers/radeonsi/si_state.c
b/src/gallium/drivers/radeonsi/si_state.c
index 6c82257..aae7332 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct
si_context *sctx, struct r600_atom
                        continue;
                }

                tex = (struct r600_texture *)cb->base.texture;
                radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                      &tex->resource, RADEON_USAGE_READWRITE,
                                      tex->resource.b.b.nr_samples > 1 ?
                                              RADEON_PRIO_COLOR_BUFFER_MSAA :
                                              RADEON_PRIO_COLOR_BUFFER);

-               if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                               tex->cmask_buffer, RADEON_USAGE_READWRITE,
-                               RADEON_PRIO_CMASK);
-               }
-
                if (tex->dcc_separate_buffer)
                        radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
                                                  tex->dcc_separate_buffer,
                                                  RADEON_USAGE_READWRITE,
                                                  RADEON_PRIO_DCC);

                /* Compute mutable surface parameters. */
                cb_color_base = tex->resource.gpu_address >> 8;
                cb_color_fmask = 0;
                cb_dcc_base = 0;
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to