Am 03.04.2016 um 12:24 schrieb Jose Fonseca: > We could unconditionally use these instrinsics, but performance with SSE2 > would suck, as LLVM falls back to calling libm. Would that really work now? Last time I checked, calls out to libm from the jitted code weren't just slow, they simply crashed.
> > lp_test_arit. > --- > src/gallium/auxiliary/gallivm/lp_bld_arit.c | 46 > ++++++++++++++++++++++++----- > 1 file changed, 39 insertions(+), 7 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > index 01c6ba9..1277743 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c > @@ -1675,13 +1675,13 @@ enum lp_build_round_mode > * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. > */ > static inline LLVMValueRef > -lp_build_round_sse41(struct lp_build_context *bld, > - LLVMValueRef a, > - enum lp_build_round_mode mode) > +lp_build_nearest_sse41(struct lp_build_context *bld, > + LLVMValueRef a) > { > LLVMBuilderRef builder = bld->gallivm->builder; > const struct lp_type type = bld->type; > LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); > + LLVMValueRef mode = LLVMConstNull(i32t); > const char *intrinsic; > LLVMValueRef res; > > @@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld, > > args[0] = undef; > args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); > - args[2] = LLVMConstInt(i32t, mode, 0); > + args[2] = mode; > > res = lp_build_intrinsic(builder, intrinsic, > vec_type, args, Elements(args), 0); > @@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld, > > res = lp_build_intrinsic_binary(builder, intrinsic, > bld->vec_type, a, > - LLVMConstInt(i32t, mode, 0)); > + mode); > } > > return res; > @@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld, > LLVMValueRef a, > enum lp_build_round_mode mode) > { > - if (util_cpu_caps.has_sse4_1) > - return lp_build_round_sse41(bld, a, mode); > + if (util_cpu_caps.has_sse4_1) { > + LLVMBuilderRef builder = bld->gallivm->builder; > + const struct lp_type type = bld->type; > + const char *intrinsic_root; > + char intrinsic[32]; > + > + assert(type.floating); > + assert(lp_check_value(type, a)); > + (void)type; > + > + switch (mode) { > + case LP_BUILD_ROUND_NEAREST: > + if (HAVE_LLVM >= 0x0304) { > + intrinsic_root = "llvm.round"; > + } else { > + return lp_build_nearest_sse41(bld, a); > + } > + break; > + case LP_BUILD_ROUND_FLOOR: > + intrinsic_root = "llvm.floor"; > + break; > + case LP_BUILD_ROUND_CEIL: > + intrinsic_root = "llvm.ceil"; > + break; > + case LP_BUILD_ROUND_TRUNCATE: > + intrinsic_root = "llvm.trunc"; > + break; > + } > + > + util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u", > + intrinsic_root, type.length, type.width); > + > + return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); > + } > else /* (util_cpu_caps.has_altivec) */ > return lp_build_round_altivec(bld, a, mode); > } > Looks good to me. I'd guess it would work for altivec too, though it's better to let someone handle that who can test it :-). Those were all new in llvm 3.3, too bad they forgot the round... Reviewed-by: Roland Scheidegger <srol...@vmware.com> _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev