From: Marek Olšák <marek.ol...@amd.com>

so that we can add 3-component fallbacks.
---
 src/gallium/drivers/radeonsi/si_shader.c        |  8 +++++--
 src/gallium/drivers/radeonsi/si_shader.h        |  5 ++---
 src/gallium/drivers/radeonsi/si_state.c         | 28 ++++++++++++-------------
 src/gallium/drivers/radeonsi/si_state.h         |  2 +-
 src/gallium/drivers/radeonsi/si_state_shaders.c |  5 ++---
 5 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cfff54a..8b9fed9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -359,21 +359,21 @@ static void declare_input_vs(
        t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
 
        t_offset = lp_build_const_int32(gallivm, input_index);
 
        t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
 
        vertex_index = LLVMGetParam(ctx->main_fn,
                                    ctx->param_vertex_index0 +
                                    input_index);
 
-       fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 
0xf;
+       fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
 
        /* Do multiple loads for double formats. */
        if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
                num_fetches = 3; /* 3 2-dword loads */
                fetch_stride = 8;
        } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
                num_fetches = 2; /* 2 4-dword loads */
                fetch_stride = 16;
        } else {
                num_fetches = 1;
@@ -6263,21 +6263,25 @@ static void si_dump_shader_key(unsigned shader, struct 
si_shader_key *key,
        switch (shader) {
        case PIPE_SHADER_VERTEX:
                fprintf(f, "  part.vs.prolog.instance_divisors = {");
                for (i = 0; i < 
ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
                        fprintf(f, !i ? "%u" : ", %u",
                                key->part.vs.prolog.instance_divisors[i]);
                fprintf(f, "}\n");
                fprintf(f, "  part.vs.epilog.export_prim_id = %u\n", 
key->part.vs.epilog.export_prim_id);
                fprintf(f, "  as_es = %u\n", key->as_es);
                fprintf(f, "  as_ls = %u\n", key->as_ls);
-               fprintf(f, "  mono.vs.fix_fetch = 0x%"PRIx64"\n", 
key->mono.vs.fix_fetch);
+
+               fprintf(f, "  mono.vs.fix_fetch = {");
+               for (i = 0; i < SI_MAX_ATTRIBS; i++)
+                       fprintf(f, !i ? "%u" : ", %u", 
key->mono.vs.fix_fetch[i]);
+               fprintf(f, "}\n");
                break;
 
        case PIPE_SHADER_TESS_CTRL:
                fprintf(f, "  part.tcs.epilog.prim_mode = %u\n", 
key->part.tcs.epilog.prim_mode);
                fprintf(f, "  mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", 
key->mono.tcs.inputs_to_copy);
                break;
 
        case PIPE_SHADER_TESS_EVAL:
                fprintf(f, "  part.tes.epilog.export_prim_id = %u\n", 
key->part.tes.epilog.export_prim_id);
                fprintf(f, "  as_es = %u\n", key->as_es);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 6398b39..4616190 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -243,21 +243,20 @@ enum {
        SI_FIX_FETCH_RGBX_32_UNORM,
        SI_FIX_FETCH_RGBA_32_SNORM,
        SI_FIX_FETCH_RGBX_32_SNORM,
        SI_FIX_FETCH_RGBA_32_USCALED,
        SI_FIX_FETCH_RGBA_32_SSCALED,
        SI_FIX_FETCH_RGBA_32_FIXED,
        SI_FIX_FETCH_RGBX_32_FIXED,
        SI_FIX_FETCH_RG_64_FLOAT,
        SI_FIX_FETCH_RGB_64_FLOAT,
        SI_FIX_FETCH_RGBA_64_FLOAT,
-       SI_FIX_FETCH_RESERVED_15, /* maximum */
 };
 
 struct si_shader;
 
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
        /* Should only be used by si_init_shader_selector_async and
         * si_build_shader_variant if thread_index == -1 (non-threaded). */
        LLVMTargetMachineRef            tm;
 
@@ -438,22 +437,22 @@ struct si_shader_key {
 
        /* These two are initially set according to the NEXT_SHADER property,
         * or guessed if the property doesn't seem correct.
         */
        unsigned as_es:1; /* export shader */
        unsigned as_ls:1; /* local shader */
 
        /* Flags for monolithic compilation only. */
        union {
                struct {
-                       /* One nibble for every input: SI_FIX_FETCH_* enums. */
-                       uint64_t        fix_fetch;
+                       /* One byte for every input: SI_FIX_FETCH_* enums. */
+                       uint8_t         fix_fetch[SI_MAX_ATTRIBS];
                } vs;
                struct {
                        uint64_t        inputs_to_copy; /* for fixed-func TCS */
                } tcs;
        } mono;
 
        /* Optimization flags for asynchronous compilation only. */
        union {
                struct {
                        uint64_t        kill_outputs; /* "get_unique_index" 
bits */
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 4ccca52..d9b9f83 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3392,72 +3392,72 @@ static void *si_create_vertex_elements(struct 
pipe_context *ctx,
                channel = first_non_void >= 0 ? &desc->channel[first_non_void] 
: NULL;
                memcpy(swizzle, desc->swizzle, sizeof(swizzle));
 
                v->format_size[i] = desc->block.bits / 8;
 
                /* The hardware always treats the 2-bit alpha channel as
                 * unsigned, so a shader workaround is needed.
                 */
                if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
                        if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
-                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM 
<< (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
                        } else if (num_format == 
V_008F0C_BUF_NUM_FORMAT_SSCALED) {
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
                        } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
                                /* This isn't actually used in OpenGL. */
-                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT 
<< (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
                        }
                } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
                        if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED;
                        else
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED;
                } else if (channel && channel->size == 32 && 
!channel->pure_integer) {
                        if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
                                if (channel->normalized) {
                                        if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-                                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
+                                               v->fix_fetch[i] = 
SI_FIX_FETCH_RGBX_32_SNORM;
                                        else
-                                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
+                                               v->fix_fetch[i] = 
SI_FIX_FETCH_RGBA_32_SNORM;
                                } else {
-                                       v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
+                                       v->fix_fetch[i] = 
SI_FIX_FETCH_RGBA_32_SSCALED;
                                }
                        } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
                                if (channel->normalized) {
                                        if (desc->swizzle[3] == PIPE_SWIZZLE_1)
-                                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
+                                               v->fix_fetch[i] = 
SI_FIX_FETCH_RGBX_32_UNORM;
                                        else
-                                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
+                                               v->fix_fetch[i] = 
SI_FIX_FETCH_RGBA_32_UNORM;
                                } else {
-                                       v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
+                                       v->fix_fetch[i] = 
SI_FIX_FETCH_RGBA_32_USCALED;
                                }
                        }
                } else if (channel && channel->size == 64 &&
                           channel->type == UTIL_FORMAT_TYPE_FLOAT) {
                        switch (desc->nr_channels) {
                        case 1:
                        case 2:
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT;
                                swizzle[0] = PIPE_SWIZZLE_X;
                                swizzle[1] = PIPE_SWIZZLE_Y;
                                swizzle[2] = desc->nr_channels == 2 ? 
PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
                                swizzle[3] = desc->nr_channels == 2 ? 
PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
                                break;
                        case 3:
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT;
                                swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
                                swizzle[1] = PIPE_SWIZZLE_Y;
                                swizzle[2] = PIPE_SWIZZLE_0;
                                swizzle[3] = PIPE_SWIZZLE_0;
                                break;
                        case 4:
-                               v->fix_fetch |= 
(uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i);
+                               v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
                                swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
                                swizzle[1] = PIPE_SWIZZLE_Y;
                                swizzle[2] = PIPE_SWIZZLE_Z;
                                swizzle[3] = PIPE_SWIZZLE_W;
                                break;
                        default:
                                assert(0);
                        }
                }
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 07b7d58..cd44ed1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -100,21 +100,21 @@ struct si_vertex_element
 {
        unsigned                        count;
        unsigned                        first_vb_use_mask;
        /* Vertex buffer descriptor list size aligned for optimal prefetch. */
        unsigned                        desc_list_byte_size;
 
        /* Two bits per attribute indicating the size of each vector component
         * in bytes if the size 3-workaround must be applied.
         */
        uint32_t                        fix_size3;
-       uint64_t                        fix_fetch;
+       uint8_t                         fix_fetch[SI_MAX_ATTRIBS];
 
        uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
        uint32_t                        format_size[SI_MAX_ATTRIBS];
        struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
 };
 
 union si_state {
        struct {
                struct si_state_blend           *blend;
                struct si_state_rasterizer      *rasterizer;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bde02f5..9570259 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -968,23 +968,22 @@ static inline void si_shader_selector_key(struct 
pipe_context *ctx,
 
        switch (sel->type) {
        case PIPE_SHADER_VERTEX:
                if (sctx->vertex_elements) {
                        unsigned count = MIN2(sel->info.num_inputs,
                                              sctx->vertex_elements->count);
                        for (i = 0; i < count; ++i)
                                key->part.vs.prolog.instance_divisors[i] =
                                        
sctx->vertex_elements->elements[i].instance_divisor;
 
-                       key->mono.vs.fix_fetch =
-                               sctx->vertex_elements->fix_fetch &
-                               u_bit_consecutive64(0, 4 * count);
+                       memcpy(key->mono.vs.fix_fetch,
+                              sctx->vertex_elements->fix_fetch, count);
                }
                if (sctx->tes_shader.cso)
                        key->as_ls = 1;
                else if (sctx->gs_shader.cso)
                        key->as_es = 1;
                else {
                        si_shader_selector_key_hw_vs(sctx, sel, key);
 
                        if (sctx->ps_shader.cso && 
sctx->ps_shader.cso->info.uses_primid)
                                key->part.vs.epilog.export_prim_id = 1;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to