From: Marek Olšák <marek.ol...@amd.com> --- src/amd/common/ac_llvm_build.c | 44 ++++++++++++++++++++++++++++++++++++++++++ src/amd/common/ac_llvm_build.h | 13 +++++++++++++ 2 files changed, 57 insertions(+)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index ab0ba09..793ff81 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -571,20 +571,64 @@ ac_build_fdiv(struct ac_llvm_context *ctx, */ LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, ctx->f32_1, den, ""); LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, ""); /* Use v_rcp_f32 instead of precise division. */ if (!LLVMIsConstant(ret)) LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); return ret; } +/* See fast_idiv_by_const.h. */ +/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */ +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildMul(builder, + LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildAdd(builder, num, + LLVMBuildZExt(builder, increment, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); +} + +/* See fast_idiv_by_const.h. */ +/* If num != UINT_MAX, this more efficient version can be used. */ +/* Set: increment = util_fast_udiv_info::increment; */ +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment) +{ + LLVMBuilderRef builder = ctx->builder; + + num = LLVMBuildLShr(builder, num, pre_shift, ""); + num = LLVMBuildNUWAdd(builder, num, increment, ""); + num = LLVMBuildMul(builder, + LLVMBuildZExt(builder, num, ctx->i64, ""), + LLVMBuildZExt(builder, multiplier, ctx->i64, ""), ""); + num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), ""); + num = LLVMBuildTrunc(builder, num, ctx->i32, ""); + return LLVMBuildLShr(builder, num, post_shift, ""); +} + /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is * already multiplied by two. id is the cube face number. */ struct cube_selection_coords { LLVMValueRef stc[2]; LLVMValueRef ma; LLVMValueRef id; }; diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 0df9234..722faad 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -172,20 +172,33 @@ ac_build_gather_values(struct ac_llvm_context *ctx, unsigned value_count); LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned num_channels); LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den); +LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment); +LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, + LLVMValueRef num, + LLVMValueRef multiplier, + LLVMValueRef pre_shift, + LLVMValueRef post_shift, + LLVMValueRef increment); + void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod, LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg); LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan, -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev