For now we skip SI && HAVE_LLVM < 0x0600 for simplicity. We also skip
setting the more accurate masks for some builtin uniforms for now as
it causes some piglit regressions.
---
 src/gallium/drivers/radeonsi/si_shader.c     |  7 +++
 src/gallium/drivers/radeonsi/si_shader_nir.c | 90 ++++++++++++++++++++++++++--
 2 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 714415edba7..13b588eb68f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2374,8 +2374,15 @@ static LLVMValueRef load_const_buffer_desc(struct 
si_shader_context *ctx, int i)
 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
 {
        struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+       struct si_shader_selector *sel = ctx->shader->selector;
+
        LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, 
ctx->param_const_and_shader_buffers);
 
+       if (sel->info.const_buffers_declared == 1 &&
+           sel->info.shader_buffers_declared == 0) {
+               return load_const_buffer_desc_fast_path(ctx);
+       }
+
        index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
        index = LLVMBuildAdd(ctx->ac.builder, index,
                             LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), 
"");
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c 
b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 437eefc54d0..e116c23815b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -600,23 +600,98 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 
        info->num_outputs = num_outputs;
 
+       struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+                                              _mesa_key_pointer_equal);
+
+       unsigned ubo_idx = 1;
        nir_foreach_variable(variable, &nir->uniforms) {
                const struct glsl_type *type = variable->type;
                enum glsl_base_type base_type =
                        glsl_get_base_type(glsl_without_array(type));
                unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
 
+               /* Gather buffers declared bitmasks. Note: radeonsi doesn't
+                * really use the mask (other than ubo_idx == 1 for regular
+                * uniforms) its really only used for getting the buffer count
+                * so we don't need to worry about the ordering.
+                */
+               if (variable->interface_type != NULL) {
+                       if (variable->data.mode == nir_var_uniform) {
+
+                               unsigned block_count;
+                               if (base_type != GLSL_TYPE_INTERFACE) {
+                                       struct set_entry *entry =
+                                               _mesa_set_search(ubo_set, 
variable->interface_type);
+
+                                       /* Check if we have already processed
+                                        * a member from this ubo.
+                                        */
+                                       if (entry)
+                                               continue;
+
+                                       block_count = 1;
+                               } else {
+                                       block_count = aoa_size;
+                               }
+
+                               info->const_buffers_declared |= 
u_bit_consecutive(ubo_idx, block_count);
+                               ubo_idx += block_count;
+
+                               _mesa_set_add(ubo_set, 
variable->interface_type);
+                       }
+
+                       if (variable->data.mode == nir_var_shader_storage) {
+                               /* TODO: make this more accurate */
+                               info->shader_buffers_declared =
+                                       u_bit_consecutive(0, 
SI_NUM_SHADER_BUFFERS);
+                       }
+
+                       continue;
+               }
+
                /* We rely on the fact that nir_lower_samplers_as_deref has
                 * eliminated struct dereferences.
                 */
-               if (base_type == GLSL_TYPE_SAMPLER)
+               if (base_type == GLSL_TYPE_SAMPLER) {
                        info->samplers_declared |=
                                u_bit_consecutive(variable->data.binding, 
aoa_size);
-               else if (base_type == GLSL_TYPE_IMAGE)
+
+                       if (variable->data.bindless) {
+                               info->const_buffers_declared |= 1;
+                               info->const_file_max[0] +=
+                                       glsl_count_attribute_slots(type, false);
+                       }
+               } else if (base_type == GLSL_TYPE_IMAGE) {
                        info->images_declared |=
                                u_bit_consecutive(variable->data.binding, 
aoa_size);
+
+                       if (variable->data.bindless) {
+                               info->const_buffers_declared |= 1;
+                               info->const_file_max[0] +=
+                                       glsl_count_attribute_slots(type, false);
+                       }
+               } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
+                       if (strncmp(variable->name, "state.", 6) == 0 ||
+                           strncmp(variable->name, "gl_", 3) == 0) {
+                               /* FIXME: figure out why piglit tests with 
builtin
+                                * uniforms are failing without this.
+                                */
+                               info->const_buffers_declared =
+                                       u_bit_consecutive(0, 
SI_NUM_CONST_BUFFERS);
+                       } else {
+                               info->const_buffers_declared |= 1;
+                               info->const_file_max[0] +=
+                                       glsl_count_attribute_slots(type, false);
+                       }
+               }
        }
 
+       _mesa_set_destroy(ubo_set, NULL);
+
+       /* This is the max index not max count so we adjust it here */
+       if (info->const_file_max[0] != 0)
+               info->const_file_max[0] -= 1;
+
        info->num_written_clipdistance = nir->info.clip_distance_array_size;
        info->num_written_culldistance = nir->info.cull_distance_array_size;
        info->clipdist_writemask = u_bit_consecutive(0, 
info->num_written_clipdistance);
@@ -625,10 +700,6 @@ void si_nir_scan_shader(const struct nir_shader *nir,
        if (info->processor == PIPE_SHADER_FRAGMENT)
                info->uses_kill = nir->info.fs.uses_discard;
 
-       /* TODO make this more accurate */
-       info->const_buffers_declared = u_bit_consecutive(0, 
SI_NUM_CONST_BUFFERS);
-       info->shader_buffers_declared = u_bit_consecutive(0, 
SI_NUM_SHADER_BUFFERS);
-
        func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
        nir_foreach_block(block, func->impl) {
                nir_foreach_instr(instr, block)
@@ -643,6 +714,13 @@ void si_nir_scan_shader(const struct nir_shader *nir,
 void
 si_lower_nir(struct si_shader_selector* sel)
 {
+       /* Disable const buffer fast path for old LLVM versions */
+       if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
+           sel->info.const_buffers_declared == 1 &&
+           sel->info.shader_buffers_declared == 0) {
+               sel->info.const_buffers_declared |= 0x2;
+       }
+
        /* Adjust the driver location of inputs and outputs. The state tracker
         * interprets them as slots, while the ac/nir backend interprets them
         * as individual components.
-- 
2.14.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to