Module: Mesa
Branch: staging/23.1
Commit: ec05c0918a74aa0a0bf8e651c4a796c56457ae57
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec05c0918a74aa0a0bf8e651c4a796c56457ae57

Author: Danylo Piliaiev <[email protected]>
Date:   Tue Jul 18 14:41:03 2023 +0200

ir3: Fix FS quad ops returning wrong values from helper invocations

Without SP_FS_CTRL_REG0.LODPIXMASK quad ops don't get values from
helper invocations, but from the current one.

Fixes:
 dEQP-VK.glsl.derivate.dfdxsubgroup.*
 dEQP-VK.glsl.derivate.dfdysubgroup.*

Cc: mesa-stable

Signed-off-by: Danylo Piliaiev <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24211>
(cherry picked from commit a0d426370db53151ad3683f50083607f6aeda6ef)

---

 .pick_status.json                                 | 2 +-
 src/freedreno/ir3/ir3_compiler_nir.c              | 4 +++-
 src/freedreno/ir3/ir3_legalize.c                  | 2 +-
 src/freedreno/ir3/ir3_shader.h                    | 2 +-
 src/freedreno/vulkan/tu_pipeline.cc               | 2 +-
 src/gallium/drivers/freedreno/a6xx/fd6_program.cc | 2 +-
 6 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index ad95d2163da..b86a24ff1ef 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -1606,7 +1606,7 @@
         "description": "ir3: Fix FS quad ops returning wrong values from 
helper invocations",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c 
b/src/freedreno/ir3/ir3_compiler_nir.c
index bdb2c92ea7f..4926b739b6f 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -4953,8 +4953,10 @@ ir3_compile_shader_nir(struct ir3_compiler *compiler,
    collect_tex_prefetches(ctx, ir);
 
    if (so->type == MESA_SHADER_FRAGMENT &&
-       ctx->s->info.fs.needs_quad_helper_invocations)
+       ctx->s->info.fs.needs_quad_helper_invocations) {
       so->need_pixlod = true;
+      so->need_full_quad = true;
+   }
 
    if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
        !ctx->s->info.fs.early_fragment_tests)
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 6939ce5eee7..a344a009ea1 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -400,7 +400,7 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct 
ir3_block *block)
          struct ir3_instruction *op_p = ir3_instr_clone(n);
          op_p->flags = IR3_INSTR_P;
 
-         ctx->so->need_fine_derivatives = true;
+         ctx->so->need_full_quad = true;
       }
    }
 
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 24de9a2c2e1..bcb8ba75eb3 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -682,7 +682,7 @@ struct ir3_shader_variant {
    /* do we need derivatives: */
    bool need_pixlod;
 
-   bool need_fine_derivatives;
+   bool need_full_quad;
 
    /* do we need VS driver params? */
    bool need_driver_params;
diff --git a/src/freedreno/vulkan/tu_pipeline.cc 
b/src/freedreno/vulkan/tu_pipeline.cc
index 5610135bfbd..44b0c385ad1 100644
--- a/src/freedreno/vulkan/tu_pipeline.cc
+++ b/src/freedreno/vulkan/tu_pipeline.cc
@@ -504,7 +504,7 @@ tu6_emit_xs(struct tu_cs *cs,
                .branchstack = ir3_shader_branchstack_hw(xs),
                .threadsize = thrsz,
                .varying = xs->total_in != 0,
-               .lodpixmask = xs->need_fine_derivatives,
+               .lodpixmask = xs->need_full_quad,
                /* unknown bit, seems unnecessary */
                .unk24 = true,
                .pixlodenable = xs->need_pixlod,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc 
b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc
index 8b061fa23bd..3054a7a16e8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_program.cc
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.cc
@@ -895,7 +895,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct 
fd_context *ctx,
       ring,
       A6XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
          COND(enable_varyings, A6XX_SP_FS_CTRL_REG0_VARYING) | 0x1000000 |
-         COND(fs->need_fine_derivatives, A6XX_SP_FS_CTRL_REG0_LODPIXMASK) |
+         COND(fs->need_full_quad, A6XX_SP_FS_CTRL_REG0_LODPIXMASK) |
          A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) |
          A6XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fs->info.max_half_reg + 1) |
          COND(fs->mergedregs, A6XX_SP_FS_CTRL_REG0_MERGEDREGS) |

Reply via email to