From: Marek Olšák <marek.ol...@amd.com> so that we can add 3-component fallbacks. --- src/gallium/drivers/radeonsi/si_shader.c | 8 +++++-- src/gallium/drivers/radeonsi/si_shader.h | 5 ++--- src/gallium/drivers/radeonsi/si_state.c | 28 ++++++++++++------------- src/gallium/drivers/radeonsi/si_state.h | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.c | 5 ++--- 5 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index cfff54a..8b9fed9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -359,21 +359,21 @@ static void declare_input_vs( t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); t_offset = lp_build_const_int32(gallivm, input_index); t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); vertex_index = LLVMGetParam(ctx->main_fn, ctx->param_vertex_index0 + input_index); - fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf; + fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index]; /* Do multiple loads for double formats. */ if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) { num_fetches = 3; /* 3 2-dword loads */ fetch_stride = 8; } else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) { num_fetches = 2; /* 2 4-dword loads */ fetch_stride = 16; } else { num_fetches = 1; @@ -6263,21 +6263,25 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key, switch (shader) { case PIPE_SHADER_VERTEX: fprintf(f, " part.vs.prolog.instance_divisors = {"); for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++) fprintf(f, !i ? "%u" : ", %u", key->part.vs.prolog.instance_divisors[i]); fprintf(f, "}\n"); fprintf(f, " part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id); fprintf(f, " as_es = %u\n", key->as_es); fprintf(f, " as_ls = %u\n", key->as_ls); - fprintf(f, " mono.vs.fix_fetch = 0x%"PRIx64"\n", key->mono.vs.fix_fetch); + + fprintf(f, " mono.vs.fix_fetch = {"); + for (i = 0; i < SI_MAX_ATTRIBS; i++) + fprintf(f, !i ? "%u" : ", %u", key->mono.vs.fix_fetch[i]); + fprintf(f, "}\n"); break; case PIPE_SHADER_TESS_CTRL: fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode); fprintf(f, " mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy); break; case PIPE_SHADER_TESS_EVAL: fprintf(f, " part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id); fprintf(f, " as_es = %u\n", key->as_es); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 6398b39..4616190 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -243,21 +243,20 @@ enum { SI_FIX_FETCH_RGBX_32_UNORM, SI_FIX_FETCH_RGBA_32_SNORM, SI_FIX_FETCH_RGBX_32_SNORM, SI_FIX_FETCH_RGBA_32_USCALED, SI_FIX_FETCH_RGBA_32_SSCALED, SI_FIX_FETCH_RGBA_32_FIXED, SI_FIX_FETCH_RGBX_32_FIXED, SI_FIX_FETCH_RG_64_FLOAT, SI_FIX_FETCH_RGB_64_FLOAT, SI_FIX_FETCH_RGBA_64_FLOAT, - SI_FIX_FETCH_RESERVED_15, /* maximum */ }; struct si_shader; /* State of the context creating the shader object. */ struct si_compiler_ctx_state { /* Should only be used by si_init_shader_selector_async and * si_build_shader_variant if thread_index == -1 (non-threaded). */ LLVMTargetMachineRef tm; @@ -438,22 +437,22 @@ struct si_shader_key { /* These two are initially set according to the NEXT_SHADER property, * or guessed if the property doesn't seem correct. */ unsigned as_es:1; /* export shader */ unsigned as_ls:1; /* local shader */ /* Flags for monolithic compilation only. */ union { struct { - /* One nibble for every input: SI_FIX_FETCH_* enums. */ - uint64_t fix_fetch; + /* One byte for every input: SI_FIX_FETCH_* enums. */ + uint8_t fix_fetch[SI_MAX_ATTRIBS]; } vs; struct { uint64_t inputs_to_copy; /* for fixed-func TCS */ } tcs; } mono; /* Optimization flags for asynchronous compilation only. */ union { struct { uint64_t kill_outputs; /* "get_unique_index" bits */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 4ccca52..d9b9f83 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3392,72 +3392,72 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; memcpy(swizzle, desc->swizzle, sizeof(swizzle)); v->format_size[i] = desc->block.bits / 8; /* The hardware always treats the 2-bit alpha channel as * unsigned, so a shader workaround is needed. */ if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM; } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) { - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED; } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) { /* This isn't actually used in OpenGL. */ - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT; } } else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) { if (desc->swizzle[3] == PIPE_SWIZZLE_1) - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED; else - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED; } else if (channel && channel->size == 32 && !channel->pure_integer) { if (channel->type == UTIL_FORMAT_TYPE_SIGNED) { if (channel->normalized) { if (desc->swizzle[3] == PIPE_SWIZZLE_1) - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM; else - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM; } else { - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED; } } else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) { if (channel->normalized) { if (desc->swizzle[3] == PIPE_SWIZZLE_1) - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM; else - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM; } else { - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED; } } } else if (channel && channel->size == 64 && channel->type == UTIL_FORMAT_TYPE_FLOAT) { switch (desc->nr_channels) { case 1: case 2: - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT; swizzle[0] = PIPE_SWIZZLE_X; swizzle[1] = PIPE_SWIZZLE_Y; swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0; swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0; break; case 3: - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT; swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */ swizzle[1] = PIPE_SWIZZLE_Y; swizzle[2] = PIPE_SWIZZLE_0; swizzle[3] = PIPE_SWIZZLE_0; break; case 4: - v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i); + v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT; swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */ swizzle[1] = PIPE_SWIZZLE_Y; swizzle[2] = PIPE_SWIZZLE_Z; swizzle[3] = PIPE_SWIZZLE_W; break; default: assert(0); } } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 07b7d58..cd44ed1 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -100,21 +100,21 @@ struct si_vertex_element { unsigned count; unsigned first_vb_use_mask; /* Vertex buffer descriptor list size aligned for optimal prefetch. */ unsigned desc_list_byte_size; /* Two bits per attribute indicating the size of each vector component * in bytes if the size 3-workaround must be applied. */ uint32_t fix_size3; - uint64_t fix_fetch; + uint8_t fix_fetch[SI_MAX_ATTRIBS]; uint32_t rsrc_word3[SI_MAX_ATTRIBS]; uint32_t format_size[SI_MAX_ATTRIBS]; struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; }; union si_state { struct { struct si_state_blend *blend; struct si_state_rasterizer *rasterizer; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index bde02f5..9570259 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -968,23 +968,22 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, switch (sel->type) { case PIPE_SHADER_VERTEX: if (sctx->vertex_elements) { unsigned count = MIN2(sel->info.num_inputs, sctx->vertex_elements->count); for (i = 0; i < count; ++i) key->part.vs.prolog.instance_divisors[i] = sctx->vertex_elements->elements[i].instance_divisor; - key->mono.vs.fix_fetch = - sctx->vertex_elements->fix_fetch & - u_bit_consecutive64(0, 4 * count); + memcpy(key->mono.vs.fix_fetch, + sctx->vertex_elements->fix_fetch, count); } if (sctx->tes_shader.cso) key->as_ls = 1; else if (sctx->gs_shader.cso) key->as_es = 1; else { si_shader_selector_key_hw_vs(sctx, sel, key); if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid) key->part.vs.epilog.export_prim_id = 1; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev