Reviewed-by: Marek Olšák <[email protected]> Marek
On Thu, Nov 3, 2016 at 11:16 AM, Nicolai Hähnle <[email protected]> wrote: > From: Nicolai Hähnle <[email protected]> > > The hardware always treats the alpha channel as unsigned, so add a shader > workaround. This is rare enough that we'll just build a monolithic vertex > shader. > > The SINT case cannot actually happen in OpenGL, but I've included it for > completeness since it's just a mix of the other cases. > --- > src/gallium/drivers/radeonsi/si_shader.c | 54 > ++++++++++++++++++++++--- > src/gallium/drivers/radeonsi/si_shader.h | 11 +++++ > src/gallium/drivers/radeonsi/si_state.c | 14 +++++++ > src/gallium/drivers/radeonsi/si_state.h | 1 + > src/gallium/drivers/radeonsi/si_state_shaders.c | 4 ++ > 5 files changed, 78 insertions(+), 6 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 28a8b1f..b170eb9 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -362,67 +362,105 @@ static LLVMValueRef get_instance_index_for_fetch( > /* The division must be done before START_INSTANCE is added. */ > if (divisor > 1) > result = LLVMBuildUDiv(gallivm->builder, result, > lp_build_const_int32(gallivm, divisor), ""); > > return LLVMBuildAdd(gallivm->builder, result, > LLVMGetParam(radeon_bld->main_fn, > param_start_instance), ""); > } > > static void declare_input_vs( > - struct si_shader_context *radeon_bld, > + struct si_shader_context *ctx, > unsigned input_index, > const struct tgsi_full_declaration *decl, > LLVMValueRef out[4]) > { > - struct lp_build_context *base = &radeon_bld->soa.bld_base.base; > + struct lp_build_context *base = &ctx->soa.bld_base.base; > struct gallivm_state *gallivm = base->gallivm; > - struct si_shader_context *ctx = > - si_shader_context(&radeon_bld->soa.bld_base); > > unsigned chan; > + unsigned fix_fetch; > > LLVMValueRef t_list_ptr; > LLVMValueRef t_offset; > LLVMValueRef t_list; > LLVMValueRef attribute_offset; > LLVMValueRef buffer_index; > LLVMValueRef args[3]; > LLVMValueRef input; > > /* Load the T list */ > t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS); > > t_offset = lp_build_const_int32(gallivm, input_index); > > t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset); > > /* Build the attribute offset */ > attribute_offset = lp_build_const_int32(gallivm, 0); > > - buffer_index = LLVMGetParam(radeon_bld->main_fn, > + buffer_index = LLVMGetParam(ctx->main_fn, > ctx->param_vertex_index0 + > input_index); > > args[0] = t_list; > args[1] = attribute_offset; > args[2] = buffer_index; > input = lp_build_intrinsic(gallivm->builder, > "llvm.SI.vs.load.input", ctx->v4f32, args, 3, > LLVMReadNoneAttribute); > > /* Break up the vec4 into individual components */ > for (chan = 0; chan < 4; chan++) { > LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); > out[chan] = LLVMBuildExtractElement(gallivm->builder, > input, llvm_chan, ""); > } > + > + fix_fetch = (ctx->shader->key.vs.fix_fetch >> (2 * input_index)) & 3; > + if (fix_fetch) { > + /* The hardware returns an unsigned value; convert it to a > + * signed one. > + */ > + LLVMValueRef tmp = out[3]; > + LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0); > + > + /* First, recover the sign-extended signed integer value. */ > + if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) > + tmp = LLVMBuildFPToUI(gallivm->builder, tmp, > ctx->i32, ""); > + else > + tmp = LLVMBuildBitCast(gallivm->builder, tmp, > ctx->i32, ""); > + > + /* For the integer-like cases, do a natural sign extension. > + * > + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 > + * and happen to contain 0, 1, 2, 3 as the two LSBs of the > + * exponent. > + */ > + tmp = LLVMBuildShl(gallivm->builder, tmp, > + fix_fetch == SI_FIX_FETCH_A2_SNORM ? > + LLVMConstInt(ctx->i32, 7, 0) : c30, ""); > + tmp = LLVMBuildAShr(gallivm->builder, tmp, c30, ""); > + > + /* Convert back to the right type. */ > + if (fix_fetch == SI_FIX_FETCH_A2_SNORM) { > + LLVMValueRef clamp; > + LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0); > + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, > ctx->f32, ""); > + clamp = LLVMBuildFCmp(gallivm->builder, LLVMRealULT, > tmp, neg_one, ""); > + tmp = LLVMBuildSelect(gallivm->builder, clamp, > neg_one, tmp, ""); > + } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) { > + tmp = LLVMBuildSIToFP(gallivm->builder, tmp, > ctx->f32, ""); > + } > + > + out[3] = tmp; > + } > } > > static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base, > unsigned swizzle) > { > struct si_shader_context *ctx = si_shader_context(bld_base); > > if (swizzle > 0) > return bld_base->uint_bld.zero; > > @@ -8095,25 +8133,29 @@ static void si_fix_num_sgprs(struct si_shader *shader) > int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, > struct si_shader *shader, > struct pipe_debug_callback *debug) > { > struct si_shader_selector *sel = shader->selector; > struct si_shader *mainp = sel->main_shader_part; > int r; > > /* LS, ES, VS are compiled on demand if the main part hasn't been > * compiled for that stage. > + * > + * Vertex shaders are compiled on demand when a vertex fetch > + * workaround must be applied. > */ > if (!mainp || > (sel->type == PIPE_SHADER_VERTEX && > (shader->key.vs.as_es != mainp->key.vs.as_es || > - shader->key.vs.as_ls != mainp->key.vs.as_ls)) || > + shader->key.vs.as_ls != mainp->key.vs.as_ls || > + shader->key.vs.fix_fetch)) || > (sel->type == PIPE_SHADER_TESS_EVAL && > shader->key.tes.as_es != mainp->key.tes.as_es) || > (sel->type == PIPE_SHADER_TESS_CTRL && > shader->key.tcs.epilog.inputs_to_copy) || > sel->type == PIPE_SHADER_COMPUTE) { > /* Monolithic shader (compiled as a whole, has many variants, > * may take a long time to compile). > */ > r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug); > if (r) > diff --git a/src/gallium/drivers/radeonsi/si_shader.h > b/src/gallium/drivers/radeonsi/si_shader.h > index d8ab2a4..59e7bfb 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.h > +++ b/src/gallium/drivers/radeonsi/si_shader.h > @@ -226,20 +226,28 @@ enum { > > SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */ > }; > > /* SI-specific system values. */ > enum { > TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT, > TGSI_SEMANTIC_DEFAULT_TESSINNER_SI, > }; > > +/* For VS shader key fix_fetch. */ > +enum { > + SI_FIX_FETCH_NONE = 0, > + SI_FIX_FETCH_A2_SNORM = 1, > + SI_FIX_FETCH_A2_SSCALED = 2, > + SI_FIX_FETCH_A2_SINT = 3, > +}; > + > struct si_shader; > > /* A shader selector is a gallium CSO and contains shader variants and > * binaries for one TGSI program. This can be shared by multiple contexts. > */ > struct si_shader_selector { > struct si_screen *screen; > struct util_queue_fence ready; > > /* Should only be used by si_init_shader_selector_async > @@ -393,20 +401,23 @@ union si_shader_part_key { > union si_shader_key { > struct { > struct si_ps_prolog_bits prolog; > struct si_ps_epilog_bits epilog; > } ps; > struct { > struct si_vs_prolog_bits prolog; > struct si_vs_epilog_bits epilog; > unsigned as_es:1; /* export shader */ > unsigned as_ls:1; /* local shader */ > + > + /* One pair of bits for every input: SI_FIX_FETCH_* enums. */ > + uint32_t fix_fetch; > } vs; > struct { > struct si_tcs_epilog_bits epilog; > } tcs; /* tessellation control shader */ > struct { > struct si_vs_epilog_bits epilog; /* same as VS */ > unsigned as_es:1; /* export shader */ > } tes; /* tessellation evaluation shader */ > struct { > struct si_gs_prolog_bits prolog; > diff --git a/src/gallium/drivers/radeonsi/si_state.c > b/src/gallium/drivers/radeonsi/si_state.c > index 642ce79..24c7b10 100644 > --- a/src/gallium/drivers/radeonsi/si_state.c > +++ b/src/gallium/drivers/radeonsi/si_state.c > @@ -3274,20 +3274,34 @@ static void *si_create_vertex_elements(struct > pipe_context *ctx, > data_format = si_translate_buffer_dataformat(ctx->screen, > desc, first_non_void); > num_format = si_translate_buffer_numformat(ctx->screen, desc, > first_non_void); > > v->rsrc_word3[i] = > S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | > > S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | > > S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | > > S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | > S_008F0C_NUM_FORMAT(num_format) | > S_008F0C_DATA_FORMAT(data_format); > v->format_size[i] = desc->block.bits / 8; > + > + /* The hardware always treats the 2-bit alpha channel as > + * unsigned, so a shader workaround is needed. > + */ > + if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) { > + if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) { > + v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * > i); > + } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SSCALED) { > + v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 > * i); > + } else if (num_format == > V_008F0C_BUF_NUM_FORMAT_SINT) { > + /* This isn't actually used in OpenGL. */ > + v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * > i); > + } > + } > } > memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * > count); > > return v; > } > > static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) > { > struct si_context *sctx = (struct si_context *)ctx; > struct si_vertex_element *v = (struct si_vertex_element*)state; > diff --git a/src/gallium/drivers/radeonsi/si_state.h > b/src/gallium/drivers/radeonsi/si_state.h > index 3ebf578..c444a69 100644 > --- a/src/gallium/drivers/radeonsi/si_state.h > +++ b/src/gallium/drivers/radeonsi/si_state.h > @@ -92,20 +92,21 @@ struct si_state_dsa { > > struct si_stencil_ref { > struct r600_atom atom; > struct pipe_stencil_ref state; > struct si_dsa_stencil_ref_part dsa_part; > }; > > struct si_vertex_element > { > unsigned count; > + uint32_t fix_fetch; > uint32_t rsrc_word3[SI_MAX_ATTRIBS]; > uint32_t format_size[SI_MAX_ATTRIBS]; > struct pipe_vertex_element elements[SI_MAX_ATTRIBS]; > }; > > union si_state { > struct { > struct si_state_blend *blend; > struct si_state_rasterizer *rasterizer; > struct si_state_dsa *dsa; > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 2a41bf1..9e95fea 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -865,20 +865,24 @@ static inline void si_shader_selector_key(struct > pipe_context *ctx, > memset(key, 0, sizeof(*key)); > > switch (sel->type) { > case PIPE_SHADER_VERTEX: > if (sctx->vertex_elements) { > unsigned count = MIN2(sel->info.num_inputs, > sctx->vertex_elements->count); > for (i = 0; i < count; ++i) > key->vs.prolog.instance_divisors[i] = > > sctx->vertex_elements->elements[i].instance_divisor; > + > + key->vs.fix_fetch = > + sctx->vertex_elements->fix_fetch & > + u_bit_consecutive(0, 2 * count); > } > if (sctx->tes_shader.cso) > key->vs.as_ls = 1; > else if (sctx->gs_shader.cso) > key->vs.as_es = 1; > > if (!sctx->gs_shader.cso && sctx->ps_shader.cso && > sctx->ps_shader.cso->info.uses_primid) > key->vs.epilog.export_prim_id = 1; > break; > -- > 2.7.4 > > _______________________________________________ > mesa-dev mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
