Re: [Mesa-dev] [PATCH 1/2] radeonsi: don't send data after write-confirm with BOTTOM_OF_PIPE_TS

2018-11-13 Thread Dieter Nützel

For the series

Tested-by: Dieter Nützel 

mpv drops notably, apart that '--vo=opengl-hq' isn't available any 
longer. Was replaced by '--vo=gpu'.


Dieter

Am 13.11.2018 22:23, schrieb Marek Olšák:

From: Marek Olšák 

There are no writes.
---
 src/gallium/drivers/radeonsi/si_fence.c   | 3 +--
 src/gallium/drivers/radeonsi/si_perfcounter.c | 3 +--
 src/gallium/drivers/radeonsi/si_query.c   | 8 +++-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_fence.c
b/src/gallium/drivers/radeonsi/si_fence.c
index 3f22ee31ae8..d385f445774 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -270,22 +270,21 @@ static void si_fine_fence_set(struct si_context 
*ctx,

radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, fence_va);
radeon_emit(cs, fence_va >> 32);
radeon_emit(cs, 0x8000);
} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
si_cp_release_mem(ctx,
  V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_VALUE_32BIT,
  NULL, fence_va, 0x8000,
  PIPE_QUERY_GPU_FINISHED);
} else {
assert(false);
}
 }

 static boolean si_fence_finish(struct pipe_screen *screen,
   struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 2ca6d2d7410..cea7d57e518 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -574,22 +574,21 @@ static void si_pc_emit_start(struct si_context 
*sctx,

 }

 /* Note: The buffer was already added in si_pc_emit_start, so we don't 
have to

  * do it again in here. */
 static void si_pc_emit_stop(struct si_context *sctx,
struct r600_resource *buffer, uint64_t va)
 {
struct radeon_cmdbuf *cs = sctx->gfx_cs;

si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_VALUE_32BIT,
  buffer, va, 0, SI_NOT_QUERY);
si_cp_wait_mem(sctx, va, 0, 0x, 0);

radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | 
EVENT_INDEX(0));

radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 	radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | 
EVENT_INDEX(0));

radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
   S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
diff --git a/src/gallium/drivers/radeonsi/si_query.c
b/src/gallium/drivers/radeonsi/si_query.c
index 9b09c74d48a..21b9aeeac28 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -883,23 +883,22 @@ static void si_query_hw_do_emit_stop(struct
si_context *sctx,
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
va += 16;
for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
-   si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+   si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_TIMESTAMP, NULL, va,
  0, query->b.type);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
unsigned sample_size = (query->result_size - 8) / 2;

va += sample_size;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));

@@ -910,22 +909,21 @@ static void si_query_hw_do_emit_stop(struct
si_context *sctx,
break;
}
default:
assert(0);
}

[Mesa-dev] [PATCH 1/2] radeonsi: don't send data after write-confirm with BOTTOM_OF_PIPE_TS

2018-11-13 Thread Marek Olšák
From: Marek Olšák 

There are no writes.
---
 src/gallium/drivers/radeonsi/si_fence.c   | 3 +--
 src/gallium/drivers/radeonsi/si_perfcounter.c | 3 +--
 src/gallium/drivers/radeonsi/si_query.c   | 8 +++-
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_fence.c 
b/src/gallium/drivers/radeonsi/si_fence.c
index 3f22ee31ae8..d385f445774 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -270,22 +270,21 @@ static void si_fine_fence_set(struct si_context *ctx,
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, fence_va);
radeon_emit(cs, fence_va >> 32);
radeon_emit(cs, 0x8000);
} else if (flags & PIPE_FLUSH_BOTTOM_OF_PIPE) {
si_cp_release_mem(ctx,
  V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_VALUE_32BIT,
  NULL, fence_va, 0x8000,
  PIPE_QUERY_GPU_FINISHED);
} else {
assert(false);
}
 }
 
 static boolean si_fence_finish(struct pipe_screen *screen,
   struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c 
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index 2ca6d2d7410..cea7d57e518 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -574,22 +574,21 @@ static void si_pc_emit_start(struct si_context *sctx,
 }
 
 /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
  * do it again in here. */
 static void si_pc_emit_stop(struct si_context *sctx,
struct r600_resource *buffer, uint64_t va)
 {
struct radeon_cmdbuf *cs = sctx->gfx_cs;
 
si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_VALUE_32BIT,
  buffer, va, 0, SI_NOT_QUERY);
si_cp_wait_mem(sctx, va, 0, 0x, 0);
 
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | 
EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
   S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
diff --git a/src/gallium/drivers/radeonsi/si_query.c 
b/src/gallium/drivers/radeonsi/si_query.c
index 9b09c74d48a..21b9aeeac28 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -883,23 +883,22 @@ static void si_query_hw_do_emit_stop(struct si_context 
*sctx,
break;
case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
va += 16;
for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
va += 8;
/* fall through */
case PIPE_QUERY_TIMESTAMP:
-   si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS,
- 0, EOP_DST_SEL_MEM,
- EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
+   si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+ EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
  EOP_DATA_SEL_TIMESTAMP, NULL, va,
  0, query->b.type);
fence_va = va + 8;
break;
case PIPE_QUERY_PIPELINE_STATISTICS: {
unsigned sample_size = (query->result_size - 8) / 2;
 
va += sample_size;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | 
EVENT_INDEX(2));
@@ -910,22 +909,21 @@ static void si_query_hw_do_emit_stop(struct si_context 
*sctx,
break;
}
default:
assert(0);
}
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, query->buffer.buf, 
RADEON_USAGE_WRITE,
  RADEON_PRIO_QUERY);
 
if (fence_va) {