On 27.06.2017 02:14, Marek Olšák wrote:
From: Marek Olšák <[email protected]>

Shader key size: 107 -> 47

Nice improvement.


Divisors of 0 and 1 are encoded in the shader key. Greater instance divisors
are loaded from a constant buffer.

The shader code doing the division is huge. Is it something we need to
worry about? Does any app use instance divisors >= 2?

This reminds me of a certain LLVM improvement that I still need to clear.

I doubt instance divisors >= 2 are used. As a data point, Vulkan doesn't support it as a feature at all, IIRC.

Can we get an optimized monotholic shader variant built for shaders that have to fetch? This should help if anybody ever triggers this, because surely not all vertex attributes will require it, and at least the latency of the loads can then be used to perhaps hide some of the divide cost.

Apart from that, patch is

Reviewed-by: Nicolai Hähnle <[email protected]>



VS prolog disassembly:
     s_load_dwordx4 s[12:15], s[0:1], 0x80  ; C00A0300 00000080
     s_nop 0                                ; BF800000
     s_waitcnt lgkmcnt(0)                   ; BF8C007F
     s_buffer_load_dword s14, s[12:15], 0x4 ; C0220386 00000004
     s_waitcnt lgkmcnt(0)                   ; BF8C007F
     v_cvt_f32_u32_e32 v4, s14              ; 7E080C0E
     v_rcp_iflag_f32_e32 v4, v4             ; 7E084704
     v_mul_f32_e32 v4, 0x4f800000, v4       ; 0A0808FF 4F800000
     v_cvt_u32_f32_e32 v4, v4               ; 7E080F04
     v_mul_hi_u32 v5, v4, s14               ; D2860005 00001D04
     v_mul_lo_i32 v6, v4, s14               ; D2850006 00001D04
     v_cmp_eq_u32_e64 s[12:13], 0, v5       ; D0CA000C 00020A80
     v_sub_i32_e32 v5, vcc, 0, v6           ; 340A0C80
     v_cndmask_b32_e64 v5, v6, v5, s[12:13] ; D1000005 00320B06
     v_mul_hi_u32 v5, v5, v4                ; D2860005 00020905
     v_add_i32_e32 v6, vcc, v5, v4          ; 320C0905
     v_subrev_i32_e32 v4, vcc, v5, v4       ; 36080905
     v_cndmask_b32_e64 v4, v4, v6, s[12:13] ; D1000004 00320D04
     v_mul_hi_u32 v5, v4, v1                ; D2860005 00020304
     v_add_i32_e32 v4, vcc, s8, v0          ; 32080008
     v_mul_lo_i32 v6, v5, s14               ; D2850006 00001D05
     v_add_i32_e32 v7, vcc, 1, v5           ; 320E0A81
     v_cmp_ge_u32_e64 s[12:13], v1, v6      ; D0CE000C 00020D01
     v_sub_i32_e32 v6, vcc, v1, v6          ; 340C0D01
     v_cmp_le_u32_e32 vcc, s14, v6          ; 7D960C0E
     v_cndmask_b32_e64 v8, 0, -1, s[12:13]  ; D1000008 00318280
     v_cndmask_b32_e64 v6, 0, -1, vcc       ; D1000006 01A98280
     v_and_b32_e32 v6, v8, v6               ; 260C0D08
     v_cmp_eq_u32_e32 vcc, 0, v6            ; 7D940C80
     v_cndmask_b32_e32 v6, v7, v5, vcc      ; 000C0B07
     v_add_i32_e32 v5, vcc, -1, v5          ; 320A0AC1
     v_cmp_eq_u32_e32 vcc, 0, v8            ; 7D941080
     v_cndmask_b32_e32 v5, v6, v5, vcc      ; 000A0B06
     v_add_i32_e32 v5, vcc, s9, v5          ; 320A0A09
---
  src/gallium/drivers/radeonsi/si_descriptors.c   |  2 +
  src/gallium/drivers/radeonsi/si_pipe.c          |  2 +
  src/gallium/drivers/radeonsi/si_shader.c        | 78 +++++++++++++++++--------
  src/gallium/drivers/radeonsi/si_shader.h        |  9 ++-
  src/gallium/drivers/radeonsi/si_state.c         | 15 +++++
  src/gallium/drivers/radeonsi/si_state.h         |  3 +
  src/gallium/drivers/radeonsi/si_state_shaders.c |  7 ++-
  7 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 75d2a1d..88f7dce 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -2185,20 +2185,22 @@ void si_emit_graphics_shader_userdata(struct si_context 
*sctx,
                                               
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
                        si_emit_shader_pointer(sctx, descs,
                                               
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
                } else {
                        si_emit_shader_pointer(sctx, descs,
                                               
R_00B230_SPI_SHADER_USER_DATA_GS_0);
                        si_emit_shader_pointer(sctx, descs,
                                               
R_00B330_SPI_SHADER_USER_DATA_ES_0);
                        si_emit_shader_pointer(sctx, descs,
                                               
R_00B430_SPI_SHADER_USER_DATA_HS_0);
+                       si_emit_shader_pointer(sctx, descs,
+                                              
R_00B530_SPI_SHADER_USER_DATA_LS_0);
                }
        }
mask = sctx->shader_pointers_dirty &
               u_bit_consecutive(SI_DESCS_FIRST_SHADER,
                                 SI_DESCS_FIRST_COMPUTE - 
SI_DESCS_FIRST_SHADER);
while (mask) {
                unsigned i = u_bit_scan(&mask);
                unsigned shader = (i - SI_DESCS_FIRST_SHADER) / 
SI_NUM_SHADER_DESCS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 4088849..a940bb8 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -301,20 +301,22 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
                        for (i = 0; i < SI_NUM_CONST_BUFFERS; i++) {
                                sctx->b.b.set_constant_buffer(&sctx->b.b, 
shader, i,
                                                              
&sctx->null_const_buf);
                        }
                }
si_set_rw_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS,
                                 &sctx->null_const_buf);
+               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS,
+                                &sctx->null_const_buf);
                si_set_rw_buffer(sctx, SI_VS_CONST_CLIP_PLANES,
                                 &sctx->null_const_buf);
                si_set_rw_buffer(sctx, SI_PS_CONST_POLY_STIPPLE,
                                 &sctx->null_const_buf);
                si_set_rw_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS,
                                 &sctx->null_const_buf);
/* Clear the NULL constant buffer, because loads should return zeros. */
                sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
                                     sctx->null_const_buf.buffer->width0, 0,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 42b08bf..55d1232 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -305,31 +305,30 @@ get_tcs_out_current_patch_data_offset(struct 
si_shader_context *ctx)
        LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
return LLVMBuildAdd(gallivm->builder, patch0_patch_data_offset,
                            LLVMBuildMul(gallivm->builder, patch_stride,
                                         rel_patch_id, ""),
                            "");
  }
static LLVMValueRef get_instance_index_for_fetch(
        struct si_shader_context *ctx,
-       unsigned param_start_instance, unsigned divisor)
+       unsigned param_start_instance, LLVMValueRef divisor)
  {
        struct gallivm_state *gallivm = &ctx->gallivm;
LLVMValueRef result = LLVMGetParam(ctx->main_fn,
                                           ctx->param_instance_id);
/* The division must be done before START_INSTANCE is added. */
-       if (divisor > 1)
-               result = LLVMBuildUDiv(gallivm->builder, result,
-                               LLVMConstInt(ctx->i32, divisor, 0), "");
+       if (divisor != ctx->i32_1)
+               result = LLVMBuildUDiv(gallivm->builder, result, divisor, "");
return LLVMBuildAdd(gallivm->builder, result,
                            LLVMGetParam(ctx->main_fn, param_start_instance), 
"");
  }
/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
   * to float. */
  static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
                                            LLVMValueRef vec4,
                                            unsigned double_index)
@@ -5275,26 +5274,24 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                FREE(shader);
                shader = NULL;
        }
        return shader;
  }
static void si_dump_shader_key_vs(const struct si_shader_key *key,
                                  const struct si_vs_prolog_bits *prolog,
                                  const char *prefix, FILE *f)
  {
-       fprintf(f, "  %s.instance_divisors = {", prefix);
-       for (int i = 0; i < ARRAY_SIZE(prolog->instance_divisors); i++) {
-               fprintf(f, !i ? "%u" : ", %u",
-                       prolog->instance_divisors[i]);
-       }
-       fprintf(f, "}\n");
+       fprintf(f, "  %s.instance_divisor_is_one = %u\n",
+               prefix, prolog->instance_divisor_is_one);
+       fprintf(f, "  %s.instance_divisor_is_fetched = %u\n",
+               prefix, prolog->instance_divisor_is_fetched);
fprintf(f, " mono.vs.fix_fetch = {");
        for (int i = 0; i < SI_MAX_ATTRIBS; i++)
                fprintf(f, !i ? "%u" : ", %u", key->mono.vs_fix_fetch[i]);
        fprintf(f, "}\n");
  }
static void si_dump_shader_key(unsigned processor, const struct si_shader *shader,
                               FILE *f)
  {
@@ -5596,24 +5593,26 @@ static void si_get_vs_prolog_key(const struct 
tgsi_shader_info *info,
        key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
        key->vs_prolog.as_ls = shader_out->key.as_ls;
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
                key->vs_prolog.as_ls = 1;
                key->vs_prolog.num_merged_next_stage_vgprs = 2;
        } else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
                key->vs_prolog.num_merged_next_stage_vgprs = 5;
        }
- /* Set the instanceID flag. */
-       for (unsigned i = 0; i < info->num_inputs; i++)
-               if (key->vs_prolog.states.instance_divisors[i])
-                       shader_out->info.uses_instanceid = true;
+       /* Enable loading the InstanceID VGPR. */
+       uint16_t input_mask = u_bit_consecutive(0, info->num_inputs);
+
+       if ((key->vs_prolog.states.instance_divisor_is_one |
+            key->vs_prolog.states.instance_divisor_is_fetched) & input_mask)
+               shader_out->info.uses_instanceid = true;
  }
/**
   * Compute the PS prolog key, which contains all the information needed to
   * build the PS prolog function, and set related bits in shader->config.
   */
  static void si_get_ps_prolog_key(struct si_shader *shader,
                                 union si_shader_part_key *key,
                                 bool separate_prolog)
  {
@@ -6520,20 +6519,35 @@ si_get_shader_part(struct si_screen *sscreen,
result->next = *list;
        *list = result;
out:
        si_llvm_dispose(&ctx);
        mtx_unlock(&sscreen->shader_parts_mutex);
        return result;
  }
+static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+       LLVMValueRef ptr[2], list;
+
+       /* Get the pointer to rw buffers. */
+       ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
+       ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
+       list = lp_build_gather_values(gallivm, ptr, 2);
+       list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
+       list = LLVMBuildIntToPtr(gallivm->builder, list,
+                                si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), 
"");
+       return list;
+}
+
  /**
   * Build the vertex shader prolog function.
   *
   * The inputs are the same as VS (a lot of SGPRs and 4 VGPR system values).
   * All inputs are returned unmodified. The vertex load indices are
   * stored after them, which will be used by the API VS for fetching inputs.
   *
   * For example, the expected outputs for instance_divisors[] = {0, 1, 2} are:
   *   input_v0,
   *   input_v1,
@@ -6602,25 +6616,47 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
        for (; i < num_params; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
/* Compute vertex load indices from instance divisors. */
+       LLVMValueRef instance_divisor_constbuf = NULL;
+
+       if (key->vs_prolog.states.instance_divisor_is_fetched) {
+               LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
+               LLVMValueRef buf_index =
+                       LLVMConstInt(ctx->i32, SI_VS_CONST_INSTANCE_DIVISORS, 
0);
+               instance_divisor_constbuf =
+                       ac_build_indexed_load_const(&ctx->ac, list, buf_index);
+       }
+
        for (i = 0; i <= key->vs_prolog.last_input; i++) {
-               unsigned divisor = key->vs_prolog.states.instance_divisors[i];
+               bool divisor_is_one =
+                       key->vs_prolog.states.instance_divisor_is_one & (1u << 
i);
+               bool divisor_is_fetched =
+                       key->vs_prolog.states.instance_divisor_is_fetched & (1u 
<< i);
                LLVMValueRef index;
- if (divisor) {
+               if (divisor_is_one || divisor_is_fetched) {
+                       LLVMValueRef divisor = ctx->i32_1;
+
+                       if (divisor_is_fetched) {
+                               divisor = buffer_load_const(ctx, 
instance_divisor_constbuf,
+                                                           
LLVMConstInt(ctx->i32, i * 4, 0));
+                               divisor = LLVMBuildBitCast(gallivm->builder, 
divisor,
+                                                          ctx->i32, "");
+                       }
+
                        /* InstanceID / Divisor + StartInstance */
                        index = get_instance_index_for_fetch(ctx,
                                                             user_sgpr_base +
                                                             
SI_SGPR_START_INSTANCE,
                                                             divisor);
                } else {
                        /* VertexID + BaseVertex */
                        index = LLVMBuildAdd(gallivm->builder,
                                             LLVMGetParam(func, 
ctx->param_vertex_id),
                                             LLVMGetParam(func, user_sgpr_base +
@@ -6859,29 +6895,21 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
        for (i = 0; i < num_params; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
/* Polygon stippling. */
        if (key->ps_prolog.states.poly_stipple) {
                /* POS_FIXED_PT is always last. */
                unsigned pos = key->ps_prolog.num_input_sgprs +
                               key->ps_prolog.num_input_vgprs - 1;
-               LLVMValueRef ptr[2], list;
-
-               /* Get the pointer to rw buffers. */
-               ptr[0] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS);
-               ptr[1] = LLVMGetParam(func, SI_SGPR_RW_BUFFERS_HI);
-               list = lp_build_gather_values(gallivm, ptr, 2);
-               list = LLVMBuildBitCast(gallivm->builder, list, ctx->i64, "");
-               list = LLVMBuildIntToPtr(gallivm->builder, list,
-                                         si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), 
"");
+               LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
si_llvm_emit_polygon_stipple(ctx, list, pos);
        }
if (key->ps_prolog.states.bc_optimize_for_persp ||
            key->ps_prolog.states.bc_optimize_for_linear) {
                unsigned i, base = key->ps_prolog.num_input_sgprs;
                LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 6432126..a10067d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -378,21 +378,28 @@ struct si_shader_selector {
   * -> = merged with the next stage
   */
/* Use the byte alignment for all following structure members for optimal
   * shader key memory footprint.
   */
  #pragma pack(push, 1)
/* Common VS bits between the shader key and the prolog key. */
  struct si_vs_prolog_bits {
-       unsigned        instance_divisors[SI_MAX_ATTRIBS];
+       /* - If neither "is_one" nor "is_fetched" has a bit set, the instance
+        *   divisor is 0.
+        * - If "is_one" has a bit set, the instance divisor is 1.
+        * - If "is_fetched" has a bit set, the instance divisor will be loaded
+        *   from the constant buffer.
+        */
+       uint16_t        instance_divisor_is_one;     /* bitmask of inputs */
+       uint16_t        instance_divisor_is_fetched; /* bitmask of inputs */
  };
/* Common TCS bits between the shader key and the epilog key. */
  struct si_tcs_epilog_bits {
        unsigned        prim_mode:3;
        unsigned        tes_reads_tess_factors:1;
  };
struct si_gs_prolog_bits {
        unsigned        tri_strip_adj_fix:1;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index a674a60..7e3d1a0 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3766,20 +3766,25 @@ static void *si_create_vertex_elements(struct 
pipe_context *ctx,
                unsigned char swizzle[4];
if (vbo_index >= SI_NUM_VERTEX_BUFFERS) {
                        FREE(v);
                        return NULL;
                }
if (elements[i].instance_divisor) {
                        v->uses_instance_divisors = true;
                        v->instance_divisors[i] = elements[i].instance_divisor;
+
+                       if (v->instance_divisors[i] == 1)
+                               v->instance_divisor_is_one |= 1u << i;
+                       else
+                               v->instance_divisor_is_fetched |= 1u << i;
                }
if (!used[vbo_index]) {
                        v->first_vb_use_mask |= 1 << i;
                        used[vbo_index] = true;
                }
desc = util_format_description(elements[i].src_format);
                first_non_void = 
util_format_get_first_non_void_channel(elements[i].src_format);
                data_format = si_translate_buffer_dataformat(ctx->screen, desc, 
first_non_void);
@@ -3894,20 +3899,30 @@ static void si_bind_vertex_elements(struct pipe_context 
*ctx, void *state)
        sctx->vertex_elements = v;
        sctx->vertex_buffers_dirty = true;
if (v &&
            (!old ||
             old->count != v->count ||
             old->uses_instance_divisors != v->uses_instance_divisors ||
             v->uses_instance_divisors || /* we don't check which divisors 
changed */
             memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 
v->count)))
                sctx->do_update_shaders = true;
+
+       if (v && v->instance_divisor_is_fetched) {
+               struct pipe_constant_buffer cb;
+
+               cb.buffer = NULL;
+               cb.user_buffer = v->instance_divisors;
+               cb.buffer_offset = 0;
+               cb.buffer_size = sizeof(uint32_t) * v->count;
+               si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
+       }
  }
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
  {
        struct si_context *sctx = (struct si_context *)ctx;
if (sctx->vertex_elements == state)
                sctx->vertex_elements = NULL;
        FREE(state);
  }
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index c9e0770..ec28aba 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -108,20 +108,22 @@ struct si_vertex_elements
        uint8_t                         fix_fetch[SI_MAX_ATTRIBS];
        uint8_t                         format_size[SI_MAX_ATTRIBS];
        uint8_t                         vertex_buffer_index[SI_MAX_ATTRIBS];
uint8_t count;
        bool                            uses_instance_divisors;
uint16_t first_vb_use_mask;
        /* Vertex buffer descriptor list size aligned for optimal prefetch. */
        uint16_t                        desc_list_byte_size;
+       uint16_t                        instance_divisor_is_one; /* bitmask of 
inputs */
+       uint16_t                        instance_divisor_is_fetched;  /* 
bitmask of inputs */
  };
union si_state {
        struct {
                struct si_state_blend           *blend;
                struct si_state_rasterizer      *rasterizer;
                struct si_state_dsa             *dsa;
                struct si_pm4_state             *poly_offset;
                struct si_pm4_state             *ls;
                struct si_pm4_state             *hs;
@@ -175,20 +177,21 @@ enum {
        SI_GS_RING_ESGS,
SI_RING_GSVS, SI_VS_STREAMOUT_BUF0,
        SI_VS_STREAMOUT_BUF1,
        SI_VS_STREAMOUT_BUF2,
        SI_VS_STREAMOUT_BUF3,
SI_HS_CONST_DEFAULT_TESS_LEVELS,
+       SI_VS_CONST_INSTANCE_DIVISORS,
        SI_VS_CONST_CLIP_PLANES,
        SI_PS_CONST_POLY_STIPPLE,
        SI_PS_CONST_SAMPLE_POSITIONS,
SI_NUM_RW_BUFFERS,
  };
/* Indices into sctx->descriptors, laid out so that gfx and compute pipelines
   * are contiguous:
   *
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4eb3b75..63cc746 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1180,24 +1180,27 @@ static unsigned si_get_alpha_test_func(struct 
si_context *sctx)
  }
static void si_shader_selector_key_vs(struct si_context *sctx,
                                      struct si_shader_selector *vs,
                                      struct si_shader_key *key,
                                      struct si_vs_prolog_bits *prolog_key)
  {
        if (!sctx->vertex_elements)
                return;
+ prolog_key->instance_divisor_is_one =
+               sctx->vertex_elements->instance_divisor_is_one;
+       prolog_key->instance_divisor_is_fetched =
+               sctx->vertex_elements->instance_divisor_is_fetched;
+
        unsigned count = MIN2(vs->info.num_inputs,
                              sctx->vertex_elements->count);
-       memcpy(prolog_key->instance_divisors,
-              sctx->vertex_elements->instance_divisors, count * 4);
        memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count);
  }
static void si_shader_selector_key_hw_vs(struct si_context *sctx,
                                         struct si_shader_selector *vs,
                                         struct si_shader_key *key)
  {
        struct si_shader_selector *ps = sctx->ps_shader.cso;
key->opt.clip_disable =



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to