Re: [Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-06 Thread Marek Olšák
On Fri, Apr 6, 2018 at 3:05 AM, Samuel Pitoiset 
wrote:

>
>
> On 04/05/2018 10:54 PM, Marek Olšák wrote:
>
>> On Thu, Apr 5, 2018 at 4:22 AM, Samuel Pitoiset <
>> samuel.pitoi...@gmail.com > wrote:
>>
>> Patches 16-17 are:
>>
>> Reviewed-by: Samuel Pitoiset > >
>>
>> Those two are quite interesting. I will probably update my kernel
>> and experiment something.
>>
>>
>> Patch 16 breaks things and needs more changes.
>>
>
> What does it break?
>

Everything can be broken by that. It depends on the timing between CPU and
GPU work.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-06 Thread Samuel Pitoiset



On 04/05/2018 10:54 PM, Marek Olšák wrote:
On Thu, Apr 5, 2018 at 4:22 AM, Samuel Pitoiset 
> wrote:


Patches 16-17 are:

Reviewed-by: Samuel Pitoiset >

Those two are quite interesting. I will probably update my kernel
and experiment something.


Patch 16 breaks things and needs more changes.


What does it break?



Marek

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-05 Thread Dieter Nützel

I've run v2 series yesterday, before the 'Mega cleanup' landed through

glmark2, UH, UV, Blender, FreeCAD, Krita 4.0.0 under KDE Plasma5

on my Polaris 20 / RX580.

With UH and UV I've saw little corruption (flickering) of the 'fps 
counter' in the upper right corner and during 'Benchmark' the 
'Benchmark...' string was somewhat corrupted, too. The right side of 
this text string was flickering. Like there is a big wrongly overlayed 
triangle.


Other than that, series is

Tested-by: Dieter Nützel 

Dieter

Am 04.04.2018 03:59, schrieb Marek Olšák:

From: Marek Olšák 

There is a kernel patch that adds the new flag.
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 
++-

 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index a3feeb93026..eb050b8fdb2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -26,20 +26,24 @@
  * of the Software.
  */

 #include "amdgpu_cs.h"
 #include "util/os_time.h"
 #include 
 #include 

 #include "amd/common/sid.h"

+#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+#endif
+
 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)

 /* FENCES */

 static struct pipe_fence_handle *
 amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
 unsigned ip_instance, unsigned ring)
 {
struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);

@@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib 
*ib)

 }

 static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct 
amdgpu_ib *ib)

 {
amdgpu_set_ib_size(ib);
ib->used_ib_space += ib->base.current.cdw * 4;
ib->used_ib_space = align(ib->used_ib_space, 
ws->info.ib_start_alignment);

ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw +
ib->base.current.cdw);
 }

-static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
+static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
+   struct amdgpu_cs_context *cs,
enum ring_type ring_type)
 {
switch (ring_type) {
case RING_DMA:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
   break;

case RING_UVD:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
   break;

case RING_UVD_ENC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
   break;

case RING_VCE:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
   break;

-   case RING_COMPUTE:
-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE;
-  break;
-
case RING_VCN_DEC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
   break;

-  case RING_VCN_ENC:
+   case RING_VCN_ENC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
   break;

-   default:
+   case RING_COMPUTE:
case RING_GFX:
-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
+  cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? 
AMDGPU_HW_IP_GFX :
+
AMDGPU_HW_IP_COMPUTE;

+
+  /* The kernel shouldn't invalidate L2 and vL1. The proper place 
for cache
+   * invalidation is the beginning of IBs (the previous commit 
does that),

+   * because completion of an IB doesn't care about the state of
GPU caches,
+   * but the beginning of an IB does. Draw calls from multiple IBs 
can be

+   * executed in parallel, so draw calls from the current IB can
finish after
+   * the next IB starts drawing, and so the cache flush at the end 
of IB

+   * is always late.
+   */
+  if (ws->info.drm_minor >= 26)
+ cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
   break;
+
+   default:
+  assert(0);
}

memset(cs->buffer_indices_hashlist, -1,
sizeof(cs->buffer_indices_hashlist));
cs->last_added_bo = NULL;
return true;
 }

 static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
 {
unsigned i;
@@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
cs->flush_data = flush_ctx;
cs->ring_type = ring_type;

struct amdgpu_cs_fence_info fence_info;
fence_info.handle = cs->ctx->user_fence_bo;
fence_info.offset = cs->ring_type;
amdgpu_cs_chunk_fence_info_to_data(_info, 
(void*)>fence_chunk);


cs->main.ib_type = IB_MAIN;

-   if (!amdgpu_init_cs_context(>csc1, ring_type)) {
+   if (!amdgpu_init_cs_context(ctx->ws, >csc1, ring_type)) {
   FREE(cs);
   return NULL;
}

-   if (!amdgpu_init_cs_context(>csc2, ring_type)) {
+   if (!amdgpu_init_cs_context(ctx->ws, >csc2, ring_type)) {
   amdgpu_destroy_cs_context(>csc1);
   FREE(cs);
   return NULL;
}

/* Set the first submission context as current. */
cs->csc = >csc1;
cs->cst = >csc2;

if (!amdgpu_get_new_ib(>ws->base, cs, IB_MAIN)) 

Re: [Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-05 Thread Marek Olšák
On Thu, Apr 5, 2018 at 4:22 AM, Samuel Pitoiset 
wrote:

> Patches 16-17 are:
>
> Reviewed-by: Samuel Pitoiset 
>
> Those two are quite interesting. I will probably update my kernel and
> experiment something.


Patch 16 breaks things and needs more changes.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-05 Thread Samuel Pitoiset

Patches 16-17 are:

Reviewed-by: Samuel Pitoiset 

Those two are quite interesting. I will probably update my kernel and 
experiment something.


On 04/04/2018 03:59 AM, Marek Olšák wrote:

From: Marek Olšák 

There is a kernel patch that adds the new flag.
---
  src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 ++-
  1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index a3feeb93026..eb050b8fdb2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -26,20 +26,24 @@
   * of the Software.
   */
  
  #include "amdgpu_cs.h"

  #include "util/os_time.h"
  #include 
  #include 
  
  #include "amd/common/sid.h"
  
+#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+#endif
+
  DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
  
  /* FENCES */
  
  static struct pipe_fence_handle *

  amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
  unsigned ip_instance, unsigned ring)
  {
 struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
  
@@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib *ib)

  }
  
  static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)

  {
 amdgpu_set_ib_size(ib);
 ib->used_ib_space += ib->base.current.cdw * 4;
 ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_start_alignment);
 ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + 
ib->base.current.cdw);
  }
  
-static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,

+static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
+   struct amdgpu_cs_context *cs,
 enum ring_type ring_type)
  {
 switch (ring_type) {
 case RING_DMA:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
break;
  
 case RING_UVD:

cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
break;
  
 case RING_UVD_ENC:

cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
break;
  
 case RING_VCE:

cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
break;
  
-   case RING_COMPUTE:

-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE;
-  break;
-
 case RING_VCN_DEC:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
break;
  
-  case RING_VCN_ENC:

+   case RING_VCN_ENC:
cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
break;
  
-   default:

+   case RING_COMPUTE:
 case RING_GFX:
-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
+  cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX :
+AMDGPU_HW_IP_COMPUTE;
+
+  /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache
+   * invalidation is the beginning of IBs (the previous commit does that),
+   * because completion of an IB doesn't care about the state of GPU 
caches,
+   * but the beginning of an IB does. Draw calls from multiple IBs can be
+   * executed in parallel, so draw calls from the current IB can finish 
after
+   * the next IB starts drawing, and so the cache flush at the end of IB
+   * is always late.
+   */
+  if (ws->info.drm_minor >= 26)
+ cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
break;
+
+   default:
+  assert(0);
 }
  
 memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));

 cs->last_added_bo = NULL;
 return true;
  }
  
  static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)

  {
 unsigned i;
@@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
 cs->flush_data = flush_ctx;
 cs->ring_type = ring_type;
  
 struct amdgpu_cs_fence_info fence_info;

 fence_info.handle = cs->ctx->user_fence_bo;
 fence_info.offset = cs->ring_type;
 amdgpu_cs_chunk_fence_info_to_data(_info, (void*)>fence_chunk);
  
 cs->main.ib_type = IB_MAIN;
  
-   if (!amdgpu_init_cs_context(>csc1, ring_type)) {

+   if (!amdgpu_init_cs_context(ctx->ws, >csc1, ring_type)) {
FREE(cs);
return NULL;
 }
  
-   if (!amdgpu_init_cs_context(>csc2, ring_type)) {

+   if (!amdgpu_init_cs_context(ctx->ws, >csc2, ring_type)) {
amdgpu_destroy_cs_context(>csc1);
FREE(cs);
return NULL;
 }
  
 /* Set the first submission context as current. */

 cs->csc = >csc1;
 cs->cst = >csc2;
  
 if (!amdgpu_get_new_ib(>ws->base, cs, IB_MAIN)) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/17] winsys/amdgpu: always set AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE

2018-04-03 Thread Marek Olšák
From: Marek Olšák 

There is a kernel patch that adds the new flag.
---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 36 ++-
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index a3feeb93026..eb050b8fdb2 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -26,20 +26,24 @@
  * of the Software.
  */
 
 #include "amdgpu_cs.h"
 #include "util/os_time.h"
 #include 
 #include 
 
 #include "amd/common/sid.h"
 
+#ifndef AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+#endif
+
 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false)
 
 /* FENCES */
 
 static struct pipe_fence_handle *
 amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
 unsigned ip_instance, unsigned ring)
 {
struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence);
 
@@ -801,56 +805,68 @@ static void amdgpu_set_ib_size(struct amdgpu_ib *ib)
 }
 
 static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
 {
amdgpu_set_ib_size(ib);
ib->used_ib_space += ib->base.current.cdw * 4;
ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_start_alignment);
ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + 
ib->base.current.cdw);
 }
 
-static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
+static bool amdgpu_init_cs_context(struct amdgpu_winsys *ws,
+   struct amdgpu_cs_context *cs,
enum ring_type ring_type)
 {
switch (ring_type) {
case RING_DMA:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_DMA;
   break;
 
case RING_UVD:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD;
   break;
 
case RING_UVD_ENC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_UVD_ENC;
   break;
 
case RING_VCE:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCE;
   break;
 
-   case RING_COMPUTE:
-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_COMPUTE;
-  break;
-
case RING_VCN_DEC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_DEC;
   break;
 
-  case RING_VCN_ENC:
+   case RING_VCN_ENC:
   cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_VCN_ENC;
   break;
 
-   default:
+   case RING_COMPUTE:
case RING_GFX:
-  cs->ib[IB_MAIN].ip_type = AMDGPU_HW_IP_GFX;
+  cs->ib[IB_MAIN].ip_type = ring_type == RING_GFX ? AMDGPU_HW_IP_GFX :
+AMDGPU_HW_IP_COMPUTE;
+
+  /* The kernel shouldn't invalidate L2 and vL1. The proper place for cache
+   * invalidation is the beginning of IBs (the previous commit does that),
+   * because completion of an IB doesn't care about the state of GPU 
caches,
+   * but the beginning of an IB does. Draw calls from multiple IBs can be
+   * executed in parallel, so draw calls from the current IB can finish 
after
+   * the next IB starts drawing, and so the cache flush at the end of IB
+   * is always late.
+   */
+  if (ws->info.drm_minor >= 26)
+ cs->ib[IB_MAIN].flags = AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE;
   break;
+
+   default:
+  assert(0);
}
 
memset(cs->buffer_indices_hashlist, -1, 
sizeof(cs->buffer_indices_hashlist));
cs->last_added_bo = NULL;
return true;
 }
 
 static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
 {
unsigned i;
@@ -918,26 +934,26 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx,
cs->flush_data = flush_ctx;
cs->ring_type = ring_type;
 
struct amdgpu_cs_fence_info fence_info;
fence_info.handle = cs->ctx->user_fence_bo;
fence_info.offset = cs->ring_type;
amdgpu_cs_chunk_fence_info_to_data(_info, (void*)>fence_chunk);
 
cs->main.ib_type = IB_MAIN;
 
-   if (!amdgpu_init_cs_context(>csc1, ring_type)) {
+   if (!amdgpu_init_cs_context(ctx->ws, >csc1, ring_type)) {
   FREE(cs);
   return NULL;
}
 
-   if (!amdgpu_init_cs_context(>csc2, ring_type)) {
+   if (!amdgpu_init_cs_context(ctx->ws, >csc2, ring_type)) {
   amdgpu_destroy_cs_context(>csc1);
   FREE(cs);
   return NULL;
}
 
/* Set the first submission context as current. */
cs->csc = >csc1;
cs->cst = >csc2;
 
if (!amdgpu_get_new_ib(>ws->base, cs, IB_MAIN)) {
-- 
2.15.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev