We were previously ... not clamping. I guess this meant that everything
got clamped to 1/0, which was enough to pass the existing tests. Or
perhaps the clamping would only happen to the rasterized depth value and
not the frag shader's output depth value. Either way, this broke
dolphin's new depth implementation, which seems to work better with
this patch.

Tested on a4xx but not a3xx.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97231
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h |  2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 30 ++++++++++++++++++++++++++-
 src/gallium/drivers/freedreno/a4xx/a4xx.xml.h |  2 +-
 src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 29 +++++++++++++++++++++++++-
 4 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 
b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index dcb6dfb..bf787d1 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum 
adreno_compare_func val)
 {
        return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & 
A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE                                
0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE                   0x00000080
 #define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                    0x80000000
 
 #define REG_A3XX_RB_DEPTH_CLEAR                                        
0x00002101
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 0fb2ee1..130223c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -536,7 +537,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
                                
A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
        }
 
-       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
                if (fp->writes_pos) {
                        val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
@@ -545,6 +546,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
                if (fp->has_kill) {
                        val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
                }
+               if (!ctx->rasterizer->depth_clip) {
+                       val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
+               }
                OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
                OUT_RING(ring, val);
        }
@@ -648,6 +652,30 @@ fd3_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
                OUT_RING(ring, 
A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
        }
 
+       if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | 
FD_DIRTY_FRAMEBUFFER)) {
+               float zmin, zmax;
+               int depth = 24;
+               if (ctx->batch->framebuffer.zsbuf) {
+                       depth = util_format_get_component_bits(
+                                       
pipe_surface_format(ctx->batch->framebuffer.zsbuf),
+                                       UTIL_FORMAT_COLORSPACE_ZS, 0);
+               }
+               util_viewport_zmin_zmax(&ctx->viewport, 
ctx->rasterizer->clip_halfz,
+                                                               &zmin, &zmax);
+
+               OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
+               if (depth == 32) {
+                       OUT_RING(ring, fui(zmin));
+                       OUT_RING(ring, fui(zmax));
+               } else if (depth == 16) {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+               } else {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+               }
+       }
+
        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | 
FD_DIRTY_BLEND_DUAL)) {
                struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
                int nr_cbufs = pfb->nr_cbufs;
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h 
b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index d9a7bb5..8e8fedb 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -1376,7 +1376,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum 
adreno_compare_func val)
 {
        return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & 
A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE                                
0x00000080
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE                   0x00000080
 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE                  0x00010000
 #define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS                        
0x00020000
 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                    0x80000000
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index e0f413f..fc0e4d1 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -550,12 +551,14 @@ fd4_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
                                
A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
        }
 
-       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
                struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
                bool fragz = fp->has_kill | fp->writes_pos;
+               bool clamp = !ctx->rasterizer->depth_clip;
 
                OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
                OUT_RING(ring, zsa->rb_depth_control |
+                               COND(clamp, 
A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
                                COND(fragz, 
A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
                                COND(fragz && fp->frag_coord, 
A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
 
@@ -642,6 +645,30 @@ fd4_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
                OUT_RING(ring, 
A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
        }
 
+       if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | 
FD_DIRTY_FRAMEBUFFER)) {
+               float zmin, zmax;
+               int depth = 24;
+               if (ctx->batch->framebuffer.zsbuf) {
+                       depth = util_format_get_component_bits(
+                                       
pipe_surface_format(ctx->batch->framebuffer.zsbuf),
+                                       UTIL_FORMAT_COLORSPACE_ZS, 0);
+               }
+               util_viewport_zmin_zmax(&ctx->viewport, 
ctx->rasterizer->clip_halfz,
+                                                               &zmin, &zmax);
+
+               OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+               if (depth == 32) {
+                       OUT_RING(ring, fui(zmin));
+                       OUT_RING(ring, fui(zmax));
+               } else if (depth == 16) {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+               } else {
+                       OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+                       OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+               }
+       }
+
        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
                struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
                unsigned n = pfb->nr_cbufs;
-- 
2.7.3

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to