Module: Mesa
Branch: main
Commit: 9217c565b2871ba377f23adc571cbe85b6a90c1f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9217c565b2871ba377f23adc571cbe85b6a90c1f

Author: Iago Toral Quiroga <[email protected]>
Date:   Mon Apr  3 10:16:16 2023 +0200

v3d,v3dv: stop trying to force 16-bit TMU output for shadow comparisons

In V3D we were doing this incorrectly by peeking into the sampler state
unconditionally, which is not correct if the TMU operations don't use
sampler state at all (like PBOs). This was causing us to fail the second
test in this sequence when both tests run back back to back in the same
process:

dEQP-GLES3.functional.texture.shadow.2d.linear.greater_or_equal_depth_component32f
dEQP-GLES3.functional.texture.specification.teximage2d_pbo.rg32f_cube

Here, the first test would setup sampler state for shadow comparisons and
the second test would setup a PBO upload, which would incorrectly pick
up the sampler state to decide about the TMU output size for the PBO
operation.

In V3DV we were doing this right looking through each texture/sampler
instruction and checking if they all involved shadow comparisons or had
relaxed precission, defaulting to 32-bit otherwise.

This special-casing for shadow comparisons also leaks from drivers
into the compiler where we are forced to emit some pieces of sampler
state for 32-bit outputs, so we had to special-case shadow instructions
there as well and we also had a fix for CS textures not having correct
sampler state representing shadow operations too. Finally,
we also had  at least a couple of bugs where forcing 32-bit TMU output
through V3D_DEBUG wasn't correctly forcing shadow comparisons to actually
be 32-bit in all the right places, leading to visual bugs with the
option enabled (Sponza being one example of this). This change eliminates
all of these issues.

Finally, the performance improvement observed from special casing shadow
comparison is negligible, and in specific scenarios it can even be
detrimental to performance due to increased register pressure (Sponza with
PCF filtering set to 4 is an example of this again).

Fixes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8684
Reviewed-by: Alejandro PiƱeiro <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22284>

---

 src/broadcom/compiler/v3d40_tex.c     |  3 +--
 src/broadcom/compiler/vir.c           | 15 ---------------
 src/broadcom/vulkan/v3dv_pipeline.c   |  2 +-
 src/gallium/drivers/v3d/v3d_context.h |  3 +--
 src/gallium/drivers/v3d/v3d_formats.c |  5 +----
 src/gallium/drivers/v3d/v3d_program.c | 12 +-----------
 src/gallium/drivers/v3d/v3dx_emit.c   |  6 ++----
 src/gallium/drivers/v3d/v3dx_state.c  |  3 +--
 8 files changed, 8 insertions(+), 41 deletions(-)

diff --git a/src/broadcom/compiler/v3d40_tex.c 
b/src/broadcom/compiler/v3d40_tex.c
index db85ac8481b..dab7e477204 100644
--- a/src/broadcom/compiler/v3d40_tex.c
+++ b/src/broadcom/compiler/v3d40_tex.c
@@ -236,8 +236,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr 
*instr)
          * parameter if the output is 32 bit
          */
         bool output_type_32_bit =
-                c->key->sampler[sampler_idx].return_size == 32 &&
-                !instr->is_shadow;
+                c->key->sampler[sampler_idx].return_size == 32;
 
         struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 2b36b524343..e79d3a99004 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -643,21 +643,6 @@ v3d_lower_nir(struct v3d_compile *c)
                 }
         }
 
-        /* CS textures may not have return_size reflecting the shadow state. */
-        nir_foreach_uniform_variable(var, c->s) {
-                const struct glsl_type *type = glsl_without_array(var->type);
-                unsigned array_len = MAX2(glsl_get_length(var->type), 1);
-
-                if (!glsl_type_is_sampler(type) ||
-                    !glsl_sampler_type_is_shadow(type))
-                        continue;
-
-                for (int i = 0; i < array_len; i++) {
-                        tex_options.lower_tex_packing[var->data.binding + i] =
-                                nir_lower_tex_packing_16;
-                }
-        }
-
         NIR_PASS(_, c->s, nir_lower_tex, &tex_options);
         NIR_PASS(_, c->s, nir_lower_system_values);
 
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c 
b/src/broadcom/vulkan/v3dv_pipeline.c
index 04143e5f0c8..53e957c2703 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -681,7 +681,7 @@ lower_tex_src(nir_builder *b,
    else  if (V3D_DBG(TMU_32BIT))
       return_size = 32;
    else
-      return_size = relaxed_precision || instr->is_shadow ? 16 : 32;
+      return_size = relaxed_precision ? 16 : 32;
 
    struct v3dv_descriptor_map *map =
       pipeline_get_descriptor_map(state->pipeline, binding_layout->type,
diff --git a/src/gallium/drivers/v3d/v3d_context.h 
b/src/gallium/drivers/v3d/v3d_context.h
index 0734718617d..af2b4121cfc 100644
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@@ -754,8 +754,7 @@ bool v3d_tex_format_supported(const struct v3d_device_info 
*devinfo,
 uint8_t v3d_get_rt_format(const struct v3d_device_info *devinfo, enum 
pipe_format f);
 uint8_t v3d_get_tex_format(const struct v3d_device_info *devinfo, enum 
pipe_format f);
 uint8_t v3d_get_tex_return_size(const struct v3d_device_info *devinfo,
-                                enum pipe_format f,
-                                enum pipe_tex_compare compare);
+                                enum pipe_format f);
 uint8_t v3d_get_tex_return_channels(const struct v3d_device_info *devinfo,
                                     enum pipe_format f);
 const uint8_t *v3d_get_format_swizzle(const struct v3d_device_info *devinfo,
diff --git a/src/gallium/drivers/v3d/v3d_formats.c 
b/src/gallium/drivers/v3d/v3d_formats.c
index 43a64360ec4..b9d0ef68d6b 100644
--- a/src/gallium/drivers/v3d/v3d_formats.c
+++ b/src/gallium/drivers/v3d/v3d_formats.c
@@ -95,7 +95,7 @@ v3d_get_tex_format(const struct v3d_device_info *devinfo, 
enum pipe_format f)
 
 uint8_t
 v3d_get_tex_return_size(const struct v3d_device_info *devinfo,
-                        enum pipe_format f, enum pipe_tex_compare compare)
+                        enum pipe_format f)
 {
         const struct v3d_format *vf = get_format(devinfo, f);
 
@@ -108,9 +108,6 @@ v3d_get_tex_return_size(const struct v3d_device_info 
*devinfo,
         if (V3D_DBG(TMU_32BIT))
                 return 32;
 
-        if (compare == PIPE_TEX_COMPARE_R_TO_TEXTURE)
-                return 16;
-
         return vf->return_size;
 }
 
diff --git a/src/gallium/drivers/v3d/v3d_program.c 
b/src/gallium/drivers/v3d/v3d_program.c
index 0e41799bcc5..828ab7c8b0e 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -467,22 +467,12 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct 
v3d_key *key,
         for (int i = 0; i < texstate->num_textures; i++) {
                 struct pipe_sampler_view *sampler = texstate->textures[i];
                 struct v3d_sampler_view *v3d_sampler = 
v3d_sampler_view(sampler);
-                struct pipe_sampler_state *sampler_state =
-                        texstate->samplers[i];
 
                 if (!sampler)
                         continue;
 
-                assert(sampler->target == PIPE_BUFFER || sampler_state);
-
-                unsigned compare_mode = sampler_state ?
-                        sampler_state->compare_mode :
-                        PIPE_TEX_COMPARE_NONE;
-
                 key->sampler[i].return_size =
-                        v3d_get_tex_return_size(devinfo,
-                                                sampler->format,
-                                                compare_mode);
+                        v3d_get_tex_return_size(devinfo, sampler->format);
 
                 /* For 16-bit, we set up the sampler to always return 2
                  * channels (meaning no recompiles for most statechanges),
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c 
b/src/gallium/drivers/v3d/v3dx_emit.c
index a35031e2fcd..e25b2609a43 100644
--- a/src/gallium/drivers/v3d/v3dx_emit.c
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@@ -97,8 +97,7 @@ swizzled_border_color(const struct v3d_device_info *devinfo,
          * For swizzling in the shader, we don't do any pre-swizzling of the
          * border color.
          */
-        if (v3d_get_tex_return_size(devinfo, sview->base.format,
-                                    sampler->compare_mode) != 32)
+        if (v3d_get_tex_return_size(devinfo, sview->base.format) != 32)
                 swiz = desc->swizzle[swiz];
 
         switch (swiz) {
@@ -131,8 +130,7 @@ emit_one_texture(struct v3d_context *v3d, struct 
v3d_texture_stateobj *stage_tex
         v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
                              job->indirect.bo);
 
-        uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
-                                                       psampler->compare_mode);
+        uint32_t return_size = v3d_get_tex_return_size(devinfo, 
psview->format);
 
         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
                 /* XXX */
diff --git a/src/gallium/drivers/v3d/v3dx_state.c 
b/src/gallium/drivers/v3d/v3dx_state.c
index 56d2769b17b..1795b9fa7b9 100644
--- a/src/gallium/drivers/v3d/v3dx_state.c
+++ b/src/gallium/drivers/v3d/v3dx_state.c
@@ -1114,8 +1114,7 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct 
pipe_resource *prsc,
                         }
                 }
         } else {
-                if (v3d_get_tex_return_size(&screen->devinfo, sample_format,
-                                           PIPE_TEX_COMPARE_NONE) == 32) {
+                if (v3d_get_tex_return_size(&screen->devinfo, sample_format) 
== 32) {
                         if (util_format_is_alpha(sample_format))
                                 so->sampler_variant = V3D_SAMPLER_STATE_32_A;
                         else

Reply via email to