From: Marek Olšák <[email protected]>
---
src/amd/common/sid.h | 11 +++++++++--
src/amd/vulkan/radv_cmd_buffer.c | 6 +++---
src/amd/vulkan/radv_query.c | 8 ++++----
src/gallium/drivers/radeonsi/si_compute.c | 2 +-
src/gallium/drivers/radeonsi/si_perfcounter.c | 6 +++---
src/gallium/drivers/radeonsi/si_query.c | 2 +-
src/gallium/drivers/radeonsi/si_state_draw.c | 2 +-
7 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h
index d20b5484223..b3321ea3a77 100644
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -153,28 +153,35 @@
#define R_3F2_CONTROL 0x3F2
#define S_3F2_IB_SIZE(x) (((unsigned)(x) & 0xfffff) << 0)
#define G_3F2_IB_SIZE(x) (((unsigned)(x) >> 0) & 0xfffff)
#define S_3F2_CHAIN(x) (((unsigned)(x) & 0x1) << 20)
#define G_3F2_CHAIN(x) (((unsigned)(x) >> 20) & 0x1)
#define S_3F2_VALID(x) (((unsigned)(x) & 0x1) << 23)
#define PKT3_COPY_DATA 0x40
#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
#define COPY_DATA_REG 0
-#define COPY_DATA_MEM 1
+#define COPY_DATA_SRC_MEM 1 /* only valid as
source */
+#define COPY_DATA_TC_L2 2
+#define COPY_DATA_GDS 3
#define COPY_DATA_PERF 4
#define COPY_DATA_IMM 5
#define COPY_DATA_TIMESTAMP 9
#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf)
<< 8)
-#define COPY_DATA_MEM_ASYNC 5
+#define COPY_DATA_DST_MEM_GRBM 1 /* sync across GRBM,
deprecated */
+#define COPY_DATA_TC_L2 2
+#define COPY_DATA_GDS 3
+#define COPY_DATA_PERF 4
+#define COPY_DATA_DST_MEM 5
#define COPY_DATA_COUNT_SEL (1 << 16)
#define COPY_DATA_WR_CONFIRM (1 << 20)
+#define COPY_DATA_ENGINE_PFP (1 << 30)
#define PKT3_PFP_SYNC_ME 0x42
#define PKT3_SURFACE_SYNC 0x43 /* deprecated on CIK, use
ACQUIRE_MEM */
#define PKT3_ME_INITIALIZE 0x44 /* not on CIK */
#define PKT3_COND_WRITE 0x45
#define PKT3_EVENT_WRITE 0x46
#define PKT3_EVENT_WRITE_EOP 0x47 /* not on GFX9 */
#define EOP_INT_SEL(x) ((x) << 24)
#define EOP_INT_SEL_NONE 0
#define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
#define EOP_DATA_SEL(x) ((x) << 29)
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index d492456d6b8..339704990e2 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1290,21 +1290,21 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer
*cmd_buffer,
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
++reg_count;
} else {
++reg_offset;
va += 4;
}
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
++reg_count;
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
radeon_emit(cs, 0);
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
@@ -1420,21 +1420,21 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer
*cmd_buffer,
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
return;
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
COPY_DATA_COUNT_SEL);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0);
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0,
cmd_buffer->state.predicating));
radeon_emit(cs, 0);
}
@@ -3734,21 +3734,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer
*cmd_buffer,
if (info->indirect) {
uint64_t va = radv_buffer_get_va(info->indirect->bo);
va += info->indirect->offset + info->indirect_offset;
radv_cs_add_buffer(ws, cs, info->indirect->bo);
if (loc->sgpr_idx != -1) {
for (unsigned i = 0; i < 3; ++i) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG));
radeon_emit(cs, (va + 4 * i));
radeon_emit(cs, (va + 4 * i) >> 32);
radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
+ loc->sgpr_idx * 4) >> 2) +
i);
radeon_emit(cs, 0);
}
}
if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index d607d24cfc6..3af56266cea 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1046,31 +1046,31 @@ void radv_CmdCopyQueryPoolResults(
radeon_emit(cs, local_src_va);
radeon_emit(cs, local_src_va >> 32);
radeon_emit(cs, TIMESTAMP_NOT_READY >> 32);
radeon_emit(cs, 0xffffffff);
radeon_emit(cs, 4);
}
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
uint64_t avail_dest_va = dest_va + elem_size;
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
-
COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
radeon_emit(cs, local_src_va);
radeon_emit(cs, local_src_va >> 32);
radeon_emit(cs, avail_dest_va);
radeon_emit(cs, avail_dest_va >> 32);
}
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
((flags & VK_QUERY_RESULT_64_BIT) ?
COPY_DATA_COUNT_SEL : 0));
radeon_emit(cs, local_src_va);
radeon_emit(cs, local_src_va >> 32);
radeon_emit(cs, dest_va);
radeon_emit(cs, dest_va >> 32);
assert(cs->cdw <= cdw_max);
}
break;
diff --git a/src/gallium/drivers/radeonsi/si_compute.c
b/src/gallium/drivers/radeonsi/si_compute.c
index e0c6902fec4..cbcd8e79c7b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -727,21 +727,21 @@ static void si_setup_tgsi_user_data(struct si_context
*sctx,
uint64_t base_va =
r600_resource(info->indirect)->gpu_address;
uint64_t va = base_va + info->indirect_offset;
int i;
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
r600_resource(info->indirect),
RADEON_USAGE_READ,
RADEON_PRIO_DRAW_INDIRECT);
for (i = 0; i < 3; ++i) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs,
COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG));
radeon_emit(cs, (va + 4 * i));
radeon_emit(cs, (va + 4 * i) >> 32);
radeon_emit(cs, (grid_size_reg >> 2) + i);
radeon_emit(cs, 0);
}
}
} else {
if (program->uses_grid_size) {
radeon_set_sh_reg_seq(cs, grid_size_reg, 3);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
b/src/gallium/drivers/radeonsi/si_perfcounter.c
index c4f6e164fb5..de71572c8aa 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -552,21 +552,21 @@ static void si_pc_emit_select(struct si_context *sctx,
static void si_pc_emit_start(struct si_context *sctx,
struct r600_resource *buffer, uint64_t va)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, buffer,
RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM));
+ COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM));
radeon_emit(cs, 1); /* immediate */
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_DISABLE_AND_RESET));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_START) |
EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
@@ -609,34 +609,34 @@ static void si_pc_emit_read(struct si_context *sctx,
if (!(regs->layout & SI_PC_FAKE)) {
if (regs->layout & SI_PC_REG_REVERSE)
reg_delta = -reg_delta;
for (idx = 0; idx < count; ++idx) {
if (regs->counters)
reg = regs->counters[idx];
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
COPY_DATA_COUNT_SEL); /* 64 bits */
radeon_emit(cs, reg >> 2);
radeon_emit(cs, 0); /* unused */
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
va += sizeof(uint64_t);
reg += reg_delta;
}
} else {
for (idx = 0; idx < count; ++idx) {
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM) |
+
COPY_DATA_DST_SEL(COPY_DATA_DST_MEM_GRBM) |
COPY_DATA_COUNT_SEL);
radeon_emit(cs, 0); /* immediate */
radeon_emit(cs, 0);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
va += sizeof(uint64_t);
}
}
}
diff --git a/src/gallium/drivers/radeonsi/si_query.c
b/src/gallium/drivers/radeonsi/si_query.c
index 80e84c23937..bdd7e2c060c 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -792,21 +792,21 @@ static void si_query_hw_do_emit_start(struct si_context
*sctx,
for (unsigned stream = 0; stream < SI_MAX_STREAMS; ++stream)
emit_sample_streamout(cs, va + 32 * stream, stream);
break;
case PIPE_QUERY_TIME_ELAPSED:
/* Write the timestamp from the CP not waiting for
* outstanding draws (top-of-pipe).
*/
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
radeon_emit(cs, COPY_DATA_COUNT_SEL |
COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
+ COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) |
EVENT_INDEX(2));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
b/src/gallium/drivers/radeonsi/si_state_draw.c
index b1d7437edb9..fceb9debc47 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -664,21 +664,21 @@ static void si_emit_draw_packets(struct si_context *sctx,
if (info->count_from_stream_output) {
struct si_streamout_target *t =
(struct
si_streamout_target*)info->count_from_stream_output;
uint64_t va = t->buf_filled_size->gpu_address +
t->buf_filled_size_offset;
radeon_set_context_reg(cs,
R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
t->stride_in_dw);
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
COPY_DATA_WR_CONFIRM);
radeon_emit(cs, va); /* src address lo */
radeon_emit(cs, va >> 32); /* src address hi */
radeon_emit(cs,
R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
radeon_emit(cs, 0); /* unused */
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
t->buf_filled_size, RADEON_USAGE_READ,
RADEON_PRIO_SO_FILLED_SIZE);
--
2.17.1
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev