Re: [Mesa-dev] [PATCH] llvmpipe: fix snorm blending
Am 20.11.2017 um 14:58 schrieb Jose Fonseca: > On 18/11/17 05:34, srol...@vmware.com wrote: >> From: Roland Scheidegger>> >> The blend math gets a bit funky due to inverse blend factors being >> in range [0,2] rather than [-1,1], our normalized math can't really >> cover this. >> src_alpha_saturate blend factor has a similar problem too. >> (Note that piglit fbo-blending-formats test is mostly useless for >> anything but unorm formats, since not just all src/dst values are >> between [0,1], but the tests are crafted in a way that the results >> are between [0,1] too.) >> >> v2: some formatting fixes, and fix a fairly obscure (to debug) >> issue with alpha-only formats (not related to snorm at all), where >> blend optimization would think it could simplify the blend equation >> if the blend factors were complementary, however was using the >> completely unrelated rgb blend factors instead of the alpha ones... >> --- >> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 50 - >> src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++ >> src/gallium/drivers/llvmpipe/lp_bld_blend.c | 130 >> ++-- >> src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 53 ++ >> 4 files changed, 187 insertions(+), 53 deletions(-) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> index a1edd34..321c6e4 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> @@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld, >> assert(lp_check_value(type, a)); >> assert(lp_check_value(type, b)); >> - if(a == bld->zero) >> + if (a == bld->zero) >> return b; >> - if(b == bld->zero) >> + if (b == bld->zero) >> return a; >> - if(a == bld->undef || b == bld->undef) >> + if (a == bld->undef || b == bld->undef) >> return bld->undef; >> - if(bld->type.norm) { >> + if (type.norm) { >> const char *intrinsic = NULL; >> - if(a == bld->one || b == bld->one) >> + if (!type.sign && (a == bld->one || b == bld->one)) >> return bld->one; >> if (!type.floating && !type.fixed) { >> if (type.width * type.length == 128) { >> - if(util_cpu_caps.has_sse2) { >> - if(type.width == 8) >> + if (util_cpu_caps.has_sse2) { >> + if (type.width == 8) >> intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : >> "llvm.x86.sse2.paddus.b"; >> - if(type.width == 16) >> + if (type.width == 16) >> intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : >> "llvm.x86.sse2.paddus.w"; >> } else if (util_cpu_caps.has_altivec) { >> - if(type.width == 8) >> + if (type.width == 8) >> intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" >> : "llvm.ppc.altivec.vaddubs"; >> - if(type.width == 16) >> + if (type.width == 16) >> intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" >> : "llvm.ppc.altivec.vadduhs"; >> } >> } >> if (type.width * type.length == 256) { >> - if(util_cpu_caps.has_avx2) { >> - if(type.width == 8) >> + if (util_cpu_caps.has_avx2) { >> + if (type.width == 8) >> intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : >> "llvm.x86.avx2.paddus.b"; >> - if(type.width == 16) >> + if (type.width == 16) >> intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : >> "llvm.x86.avx2.paddus.w"; >> } >> } >> @@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld, >> assert(lp_check_value(type, a)); >> assert(lp_check_value(type, b)); >> - if(b == bld->zero) >> + if (b == bld->zero) >> return a; >> - if(a == bld->undef || b == bld->undef) >> + if (a == bld->undef || b == bld->undef) >> return bld->undef; >> - if(a == b) >> + if (a == b) >> return bld->zero; >> - if(bld->type.norm) { >> + if (type.norm) { >> const char *intrinsic = NULL; >> - if(b == bld->one) >> + if (!type.sign && b == bld->one) >> return bld->zero; >> if (!type.floating && !type.fixed) { >> if (type.width * type.length == 128) { >> if (util_cpu_caps.has_sse2) { >> - if(type.width == 8) >> + if (type.width == 8) >> intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : >> "llvm.x86.sse2.psubus.b"; >> - if(type.width == 16) >> + if (type.width == 16) >> intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : >> "llvm.x86.sse2.psubus.w"; >> } else if (util_cpu_caps.has_altivec) { >> - if(type.width == 8) >> + if (type.width
Re: [Mesa-dev] [PATCH] llvmpipe: fix snorm blending
On 18/11/17 05:34, srol...@vmware.com wrote: From: Roland ScheideggerThe blend math gets a bit funky due to inverse blend factors being in range [0,2] rather than [-1,1], our normalized math can't really cover this. src_alpha_saturate blend factor has a similar problem too. (Note that piglit fbo-blending-formats test is mostly useless for anything but unorm formats, since not just all src/dst values are between [0,1], but the tests are crafted in a way that the results are between [0,1] too.) v2: some formatting fixes, and fix a fairly obscure (to debug) issue with alpha-only formats (not related to snorm at all), where blend optimization would think it could simplify the blend equation if the blend factors were complementary, however was using the completely unrelated rgb blend factors instead of the alpha ones... --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 50 - src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++ src/gallium/drivers/llvmpipe/lp_bld_blend.c | 130 ++-- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 53 ++ 4 files changed, 187 insertions(+), 53 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a1edd34..321c6e4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(a == bld->zero) + if (a == bld->zero) return b; - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(a == bld->one || b == bld->one) + if (!type.sign && (a == bld->one || b == bld->one)) return bld->one; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { -if(util_cpu_caps.has_sse2) { - if(type.width == 8) +if (util_cpu_caps.has_sse2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; } } if (type.width * type.length == 256) { -if(util_cpu_caps.has_avx2) { - if(type.width == 8) +if (util_cpu_caps.has_avx2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w"; } } @@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(a == b) + if (a == b) return bld->zero; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(b == bld->one) + if (!type.sign && b == bld->one) return bld->zero; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs"; } } if (type.width * type.length == 256) {
[Mesa-dev] [PATCH] llvmpipe: fix snorm blending
From: Roland ScheideggerThe blend math gets a bit funky due to inverse blend factors being in range [0,2] rather than [-1,1], our normalized math can't really cover this. src_alpha_saturate blend factor has a similar problem too. (Note that piglit fbo-blending-formats test is mostly useless for anything but unorm formats, since not just all src/dst values are between [0,1], but the tests are crafted in a way that the results are between [0,1] too.) v2: some formatting fixes, and fix a fairly obscure (to debug) issue with alpha-only formats (not related to snorm at all), where blend optimization would think it could simplify the blend equation if the blend factors were complementary, however was using the completely unrelated rgb blend factors instead of the alpha ones... --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 50 - src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++ src/gallium/drivers/llvmpipe/lp_bld_blend.c | 130 ++-- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 53 ++ 4 files changed, 187 insertions(+), 53 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a1edd34..321c6e4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -541,38 +541,38 @@ lp_build_add(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(a == bld->zero) + if (a == bld->zero) return b; - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(a == bld->one || b == bld->one) + if (!type.sign && (a == bld->one || b == bld->one)) return bld->one; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { -if(util_cpu_caps.has_sse2) { - if(type.width == 8) +if (util_cpu_caps.has_sse2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs"; } } if (type.width * type.length == 256) { -if(util_cpu_caps.has_avx2) { - if(type.width == 8) +if (util_cpu_caps.has_avx2) { + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w"; } } @@ -842,38 +842,38 @@ lp_build_sub(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); - if(b == bld->zero) + if (b == bld->zero) return a; - if(a == bld->undef || b == bld->undef) + if (a == bld->undef || b == bld->undef) return bld->undef; - if(a == b) + if (a == b) return bld->zero; - if(bld->type.norm) { + if (type.norm) { const char *intrinsic = NULL; - if(b == bld->one) + if (!type.sign && b == bld->one) return bld->zero; if (!type.floating && !type.fixed) { if (type.width * type.length == 128) { if (util_cpu_caps.has_sse2) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w"; } else if (util_cpu_caps.has_altivec) { - if(type.width == 8) + if (type.width == 8) intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs"; - if(type.width == 16) + if (type.width == 16) intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs"; } } if (type.width * type.length == 256) { if (util_cpu_caps.has_avx2) { - if(type.width == 8) + if (type.width
Re: [Mesa-dev] [PATCH] llvmpipe: fix snorm blending
Am 17.11.2017 um 14:20 schrieb Emil Velikov: > Hi Roland, > > Just a small fly-by idea: > > On 17 November 2017 at 07:00,wrote: > >> - if(bld->type.norm) { >> + if(type.norm) { >>const char *intrinsic = NULL; >> >> - if(a == bld->one || b == bld->one) >> + if(!type.sign && (a == bld->one || b == bld->one)) >> return bld->one; >> >>if (!type.floating && !type.fixed) { >> @@ -849,10 +849,10 @@ lp_build_sub(struct lp_build_context *bld, >> if(a == b) >>return bld->zero; >> >> - if(bld->type.norm) { >> + if(type.norm) { >>const char *intrinsic = NULL; >> >> - if(b == bld->one) >> + if(!type.sign && b == bld->one) >> return bld->zero; >> > One could fix the coding style (aka missing space between if and > opening bracket) in the above code, while they're here? > Ah yes, I'll fix that. (There's so many of these in that file that I didn't even notice it was wrong...) Roland ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: fix snorm blending
Hi Roland, Just a small fly-by idea: On 17 November 2017 at 07:00,wrote: > - if(bld->type.norm) { > + if(type.norm) { >const char *intrinsic = NULL; > > - if(a == bld->one || b == bld->one) > + if(!type.sign && (a == bld->one || b == bld->one)) > return bld->one; > >if (!type.floating && !type.fixed) { > @@ -849,10 +849,10 @@ lp_build_sub(struct lp_build_context *bld, > if(a == b) >return bld->zero; > > - if(bld->type.norm) { > + if(type.norm) { >const char *intrinsic = NULL; > > - if(b == bld->one) > + if(!type.sign && b == bld->one) > return bld->zero; > One could fix the coding style (aka missing space between if and opening bracket) in the above code, while they're here? -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: fix snorm blending
From: Roland ScheideggerThe blend math gets a bit funky due to inverse blend factors being in range [0,2] rather than [-1,1], our normalized math can't really cover this. src_alpha_saturate blend factor has a similar problem too. (Note that piglit fbo-blending-formats test is mostly useless for anything but unorm formats, since not just all src/dst values are between [0,1], but the tests are crafted in a way that the results are between [0,1] too.) --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 10 +- src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++ src/gallium/drivers/llvmpipe/lp_bld_blend.c | 120 +++- src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 28 -- 4 files changed, 149 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index a1edd34..628dedd 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -548,10 +548,10 @@ lp_build_add(struct lp_build_context *bld, if(a == bld->undef || b == bld->undef) return bld->undef; - if(bld->type.norm) { + if(type.norm) { const char *intrinsic = NULL; - if(a == bld->one || b == bld->one) + if(!type.sign && (a == bld->one || b == bld->one)) return bld->one; if (!type.floating && !type.fixed) { @@ -849,10 +849,10 @@ lp_build_sub(struct lp_build_context *bld, if(a == b) return bld->zero; - if(bld->type.norm) { + if(type.norm) { const char *intrinsic = NULL; - if(b == bld->one) + if(!type.sign && b == bld->one) return bld->zero; if (!type.floating && !type.fixed) { @@ -963,7 +963,7 @@ lp_build_sub(struct lp_build_context *bld, * @sa Michael Herf, The "double blend trick", May 2000, * http://www.stereopsis.com/doubleblend.html */ -static LLVMValueRef +LLVMValueRef lp_build_mul_norm(struct gallivm_state *gallivm, struct lp_type wide_type, LLVMValueRef a, LLVMValueRef b) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 2a4137a..f5b2800 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -71,6 +71,13 @@ lp_build_sub(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); + +LLVMValueRef +lp_build_mul_norm(struct gallivm_state *gallivm, + struct lp_type wide_type, + LLVMValueRef a, + LLVMValueRef b); + LLVMValueRef lp_build_mul(struct lp_build_context *bld, LLVMValueRef a, diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c index 1feb415..bd886dc 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -35,6 +35,7 @@ #include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_debug.h" +#include "gallivm/lp_bld_pack.h" #include "lp_bld_blend.h" @@ -86,6 +87,56 @@ lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) /** + * Whether this is a inverse blend factor + */ +static inline boolean +is_inverse_factor(unsigned factor) +{ + return factor > 0x11; +} + + +/** + * Calculates the (expanded to wider type) multiplication + * of 2 normalized numbers. + */ +static void +lp_build_mul_norm_expand(struct lp_build_context *bld, + LLVMValueRef a, LLVMValueRef b, + LLVMValueRef *resl, LLVMValueRef *resh, + boolean signedness_differs) +{ + const struct lp_type type = bld->type; + struct lp_type wide_type = lp_wider_type(type); + struct lp_type wide_type2 = wide_type; + struct lp_type type2 = type; + LLVMValueRef al, ah, bl, bh; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + assert(!type.floating && !type.fixed && type.norm); + + if(a == bld->zero || b == bld->zero) { + LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); + *resl = zero; + *resh = zero; + return; + } + + if (signedness_differs) { + type2.sign = !type.sign; + wide_type2.sign = !wide_type2.sign; + } + + lp_build_unpack2_native(bld->gallivm, type, wide_type, a, , ); + lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, , ); + + *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); + *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); +} + + +/** * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml */ LLVMValueRef @@ -192,9 +243,72 @@ lp_build_blend(struct lp_build_context *bld, if (optimise_only) return NULL; - src_term = lp_build_mul(bld, src, src_factor); - dst_term = lp_build_mul(bld, dst, dst_factor); -