Module: Mesa
Branch: main
Commit: 7893040f807f2b81d03a6a19c577d6d98bcc116c
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7893040f807f2b81d03a6a19c577d6d98bcc116c

Author: Bas Nieuwenhuizen <[email protected]>
Date:   Mon Dec  5 00:54:14 2022 +0100

radv: Add stricter space checks.

The check for max_dw means that none of checks triggered reliably
when we had an issue. Use a stricter reserved dw measure to increase
the probability of catching issues.

Adds a radeon_check_space to some places after cs_create as they
previously relied on the min. cs size, but that would still trigger
the checks.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20152>

---

 src/amd/vulkan/radv_cs.h                      | 22 ++++++++++++----------
 src/amd/vulkan/radv_pipeline_compute.c        |  2 +-
 src/amd/vulkan/radv_pipeline_graphics.c       |  4 ++--
 src/amd/vulkan/radv_queue.c                   |  1 +
 src/amd/vulkan/radv_radeon_winsys.h           |  1 +
 src/amd/vulkan/radv_sqtt.c                    |  4 ++++
 src/amd/vulkan/si_cmd_buffer.c                |  2 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 10 ++++++++++
 8 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index 351d8e2e026..822e0b0797c 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -34,8 +34,10 @@
 static inline unsigned
 radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, 
unsigned needed)
 {
+   assert(cs->cdw <= cs->reserved_dw);
    if (cs->max_dw - cs->cdw < needed)
       ws->cs_grow(cs, needed);
+   cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed);
    return cs->cdw + needed;
 }
 
@@ -43,7 +45,7 @@ static inline void
 radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
    assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
-   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(cs->cdw + 2 + num <= cs->reserved_dw);
    assert(num);
    radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
    radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
@@ -60,7 +62,7 @@ static inline void
 radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned 
num)
 {
    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
-   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(cs->cdw + 2 + num <= cs->reserved_dw);
    assert(num);
    radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
    radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
@@ -77,7 +79,7 @@ static inline void
 radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned 
idx, unsigned value)
 {
    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
-   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(cs->cdw + 3 <= cs->reserved_dw);
    radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
    radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
    radeon_emit(cs, value);
@@ -87,7 +89,7 @@ static inline void
 radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
 {
    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(cs->cdw + 2 + num <= cs->reserved_dw);
    assert(num);
    radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
    radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
@@ -105,7 +107,7 @@ radeon_set_sh_reg_idx(const struct radv_physical_device 
*pdevice, struct radeon_
                       unsigned reg, unsigned idx, unsigned value)
 {
    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
-   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(cs->cdw + 3 <= cs->reserved_dw);
    assert(idx);
 
    unsigned opcode = PKT3_SET_SH_REG_INDEX;
@@ -121,7 +123,7 @@ static inline void
 radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned 
num)
 {
    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(cs->cdw + 2 + num <= cs->reserved_dw);
    assert(num);
    radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
@@ -131,7 +133,7 @@ static inline void
 radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, 
unsigned num)
 {
    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-   assert(cs->cdw + 2 + num <= cs->max_dw);
+   assert(cs->cdw + 2 + num <= cs->reserved_dw);
    assert(num);
    radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
@@ -149,7 +151,7 @@ radeon_set_uconfig_reg_idx(const struct 
radv_physical_device *pdevice, struct ra
                            unsigned reg, unsigned idx, unsigned value)
 {
    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(cs->cdw + 3 <= cs->reserved_dw);
    assert(idx);
 
    unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
@@ -167,7 +169,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, 
unsigned reg, unsigne
 {
    struct radeon_cmdbuf *cs = cmd_buffer->cs;
    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
-   assert(cs->cdw + 3 <= cs->max_dw);
+   assert(cs->cdw + 3 <= cs->reserved_dw);
 
    /*
     * On GFX10, there is a bug with the ME implementation of its content 
addressable memory (CAM),
@@ -186,7 +188,7 @@ static inline void
 radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, 
unsigned value)
 {
    assert(reg < CIK_UCONFIG_REG_OFFSET);
-   assert(cs->cdw + 6 <= cs->max_dw);
+   assert(cs->cdw + 6 <= cs->reserved_dw);
 
    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | 
COPY_DATA_DST_SEL(COPY_DATA_PERF));
diff --git a/src/amd/vulkan/radv_pipeline_compute.c 
b/src/amd/vulkan/radv_pipeline_compute.c
index 2c03dec6196..8538c6bf9a5 100644
--- a/src/amd/vulkan/radv_pipeline_compute.c
+++ b/src/amd/vulkan/radv_pipeline_compute.c
@@ -103,7 +103,7 @@ radv_compute_generate_pm4(const struct radv_device *device, 
struct radv_compute_
    struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
    struct radeon_cmdbuf *cs = &pipeline->base.cs;
 
-   cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16;
+   cs->reserved_dw = cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 
16;
    cs->buf = malloc(cs->max_dw * 4);
 
    radv_pipeline_emit_hw_cs(pdevice, cs, shader);
diff --git a/src/amd/vulkan/radv_pipeline_graphics.c 
b/src/amd/vulkan/radv_pipeline_graphics.c
index db5a51608e4..ad902773b3b 100644
--- a/src/amd/vulkan/radv_pipeline_graphics.c
+++ b/src/amd/vulkan/radv_pipeline_graphics.c
@@ -3751,8 +3751,8 @@ radv_pipeline_emit_pm4(const struct radv_device *device, 
struct radv_graphics_pi
    struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
    struct radeon_cmdbuf *cs = &pipeline->base.cs;
 
-   cs->max_dw = 64;
-   ctx_cs->max_dw = 256;
+   cs->reserved_dw = cs->max_dw = 64;
+   ctx_cs->reserved_dw = ctx_cs->max_dw = 256;
    cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
    ctx_cs->buf = cs->buf + cs->max_dw;
 
diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c
index f301493af5e..935cfe05980 100644
--- a/src/amd/vulkan/radv_queue.c
+++ b/src/amd/vulkan/radv_queue.c
@@ -1064,6 +1064,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, 
struct radv_device *devi
          goto fail;
       }
 
+      radeon_check_space(ws, cs, 512);
       dest_cs[i] = cs;
 
       if (scratch_bo)
diff --git a/src/amd/vulkan/radv_radeon_winsys.h 
b/src/amd/vulkan/radv_radeon_winsys.h
index c26993588a4..cbd7a0c5687 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -114,6 +114,7 @@ struct radeon_cmdbuf {
     * store and reload them between buf writes. */
    uint64_t cdw;    /* Number of used dwords. */
    uint64_t max_dw; /* Maximum number of dwords. */
+   uint64_t reserved_dw; /* Number of dwords reserved through 
radeon_check_space() */
    uint32_t *buf;   /* The base pointer of the chunk. */
 };
 
diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c
index 6fb081872e1..a64265187e8 100644
--- a/src/amd/vulkan/radv_sqtt.c
+++ b/src/amd/vulkan/radv_sqtt.c
@@ -687,6 +687,8 @@ radv_begin_sqtt(struct radv_queue *queue)
    if (!cs)
       return false;
 
+   radeon_check_space(ws, cs, 256);
+
    switch (family) {
    case RADV_QUEUE_GENERAL:
       radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@@ -756,6 +758,8 @@ radv_end_sqtt(struct radv_queue *queue)
    if (!cs)
       return false;
 
+   radeon_check_space(ws, cs, 256);
+
    switch (family) {
    case RADV_QUEUE_GENERAL:
       radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 93041c6e9dc..9e1687cd0f7 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -657,6 +657,8 @@ cik_create_gfx_config(struct radv_device *device)
    if (!cs)
       return;
 
+   radeon_check_space(device->ws, cs, 512);
+
    si_emit_graphics(device, cs);
 
    while (cs->cdw & 7) {
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 84967030b7a..f4054cb2ac1 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -388,6 +388,7 @@ radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t 
min_size)
 
    cs->base.buf = (uint32_t *)cs->ib_mapped;
    cs->base.cdw = 0;
+   cs->base.reserved_dw = 0;
    cs->base.max_dw = ib_size / 4 - 4;
 }
 
@@ -397,6 +398,8 @@ radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
    struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
    enum amd_ip_type ip_type = cs->hw_ip;
 
+   assert(cs->base.cdw <= cs->base.reserved_dw);
+
    uint32_t ib_pad_dw_mask = MAX2(3, cs->ws->info.ib_pad_dw_mask[ip_type]);
    uint32_t nop_packet = get_nop_packet(cs);
 
@@ -442,6 +445,7 @@ radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
 {
    struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
    cs->base.cdw = 0;
+   cs->base.reserved_dw = 0;
    cs->status = VK_SUCCESS;
 
    for (unsigned i = 0; i < cs->num_buffers; ++i) {
@@ -670,6 +674,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf 
*_parent, struct radeon_cm
       if (parent->base.cdw + 4 > parent->base.max_dw)
          radv_amdgpu_cs_grow(&parent->base, 4);
 
+      parent->base.reserved_dw = MAX2(parent->base.reserved_dw, 
parent->base.cdw + 4);
+
       /* Not setting the CHAIN bit will launch an IB2. */
       radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER, 2, 0));
       radeon_emit(&parent->base, child->ib.ib_mc_address);
@@ -686,6 +692,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf 
*_parent, struct radeon_cm
          if (parent->base.cdw + ib->cdw > parent->base.max_dw)
             radv_amdgpu_cs_grow(&parent->base, ib->cdw);
 
+         parent->base.reserved_dw = MAX2(parent->base.reserved_dw, 
parent->base.cdw + ib->cdw);
+
          mapped = ws->base.buffer_map(ib->bo);
          if (!mapped) {
             parent->status = VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -704,6 +712,8 @@ radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf 
*_parent, struct radeon_cm
          if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
             radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
 
+         parent->base.reserved_dw = MAX2(parent->base.reserved_dw, 
parent->base.cdw + child->base.cdw);
+
          memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * 
child->base.cdw);
          parent->base.cdw += child->base.cdw;
       }

Reply via email to