Updated series looks good to me. Jose
----- Original Message ----- > From: Roland Scheidegger <[email protected]> > > Not used since ages, and it wouldn't work at all with explicit derivatives > now > (not that it did before as it ignored them but now the code would just use > the derivs pre-projected which would be quite random numbers). > > v2: also get rid of 3 helper functions no longer used. > --- > src/gallium/auxiliary/gallivm/lp_bld_sample.c | 761 > ++++++++++--------------- > 1 file changed, 286 insertions(+), 475 deletions(-) > > diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > index 39c3a2f..1c35200 100644 > --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c > +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c > @@ -1414,72 +1414,6 @@ lp_build_cube_imapos(struct lp_build_context > *coord_bld, LLVMValueRef coord) > return ima; > } > > -/** Helper used by lp_build_cube_lookup() */ > -static LLVMValueRef > -lp_build_cube_imaneg(struct lp_build_context *coord_bld, LLVMValueRef coord) > -{ > - /* ima = -0.5 / abs(coord); */ > - LLVMValueRef negHalf = lp_build_const_vec(coord_bld->gallivm, > coord_bld->type, -0.5); > - LLVMValueRef absCoord = lp_build_abs(coord_bld, coord); > - LLVMValueRef ima = lp_build_div(coord_bld, negHalf, absCoord); > - return ima; > -} > - > -/** > - * Helper used by lp_build_cube_lookup() > - * FIXME: the sign here can also be 0. > - * Arithmetically this could definitely make a difference. Either > - * fix the comment or use other (simpler) sign function, not sure > - * which one it should be. > - * \param sign scalar +1 or -1 > - * \param coord float vector > - * \param ima float vector > - */ > -static LLVMValueRef > -lp_build_cube_coord(struct lp_build_context *coord_bld, > - LLVMValueRef sign, int negate_coord, > - LLVMValueRef coord, LLVMValueRef ima) > -{ > - /* return negate(coord) * ima * sign + 0.5; */ > - LLVMValueRef half = lp_build_const_vec(coord_bld->gallivm, > coord_bld->type, 0.5); > - LLVMValueRef res; > - > - assert(negate_coord == +1 || negate_coord == -1); > - > - if (negate_coord == -1) { > - coord = lp_build_negate(coord_bld, coord); > - } > - > - res = lp_build_mul(coord_bld, coord, ima); > - if (sign) { > - sign = lp_build_broadcast_scalar(coord_bld, sign); > - res = lp_build_mul(coord_bld, res, sign); > - } > - res = lp_build_add(coord_bld, res, half); > - > - return res; > -} > - > - > -/** Helper used by lp_build_cube_lookup() > - * Return (major_coord >= 0) ? pos_face : neg_face; > - */ > -static LLVMValueRef > -lp_build_cube_face(struct lp_build_sample_context *bld, > - LLVMValueRef major_coord, > - unsigned pos_face, unsigned neg_face) > -{ > - struct gallivm_state *gallivm = bld->gallivm; > - LLVMBuilderRef builder = gallivm->builder; > - LLVMValueRef cmp = LLVMBuildFCmp(builder, LLVMRealUGE, > - major_coord, > - bld->float_bld.zero, ""); > - LLVMValueRef pos = lp_build_const_int32(gallivm, pos_face); > - LLVMValueRef neg = lp_build_const_int32(gallivm, neg_face); > - LLVMValueRef res = LLVMBuildSelect(builder, cmp, pos, neg, ""); > - return res; > -} > - > > /** Helper for doing 3-wise selection. > * Returns sel1 ? val2 : (sel0 ? val0 : val1). > @@ -1497,6 +1431,7 @@ lp_build_select3(struct lp_build_context *sel_bld, > return lp_build_select(sel_bld, sel1, val2, tmp); > } > > + > /** > * Generate code to do cube face selection and compute per-face texcoords. > */ > @@ -1513,301 +1448,141 @@ lp_build_cube_lookup(struct lp_build_sample_context > *bld, > struct gallivm_state *gallivm = bld->gallivm; > LLVMValueRef si, ti, ri; > > - if (1 || coord_bld->type.length > 4) { > - /* > - * Do per-pixel face selection. We cannot however (as we used to do) > - * simply calculate the derivs afterwards (which is very bogus for > - * explicit derivs btw) because the values would be "random" when > - * not all pixels lie on the same face. So what we do here is just > - * calculate the derivatives after scaling the coords by the absolute > - * value of the inverse major axis, and essentially do rho calculation > - * steps as if it were a 3d texture. This is perfect if all pixels hit > - * the same face, but not so great at edges, I believe the max error > - * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially > measuring > - * the 3d distance between 2 points on the cube instead of measuring > up/down > - * the edge). Still this is possibly a win over just selecting the > same face > - * for all pixels. Unfortunately, something like that doesn't work for > - * explicit derivatives. > - */ > - struct lp_build_context *cint_bld = &bld->int_coord_bld; > - struct lp_type intctype = cint_bld->type; > - LLVMTypeRef coord_vec_type = coord_bld->vec_type; > - LLVMTypeRef cint_vec_type = cint_bld->vec_type; > - LLVMValueRef as, at, ar, face, face_s, face_t; > - LLVMValueRef as_ge_at, maxasat, ar_ge_as_at; > - LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz; > - LLVMValueRef tnegi, rnegi; > - LLVMValueRef ma, mai, signma, signmabit, imahalfpos; > - LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, > 0.5); > - LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype, > - 1 << (intctype.width - > 1)); > - LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype, > - intctype.width -1); > - LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_X); > - LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_Y); > - LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_Z); > - LLVMValueRef s = coords[0]; > - LLVMValueRef t = coords[1]; > - LLVMValueRef r = coords[2]; > - > - assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1); > - assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1); > - assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1); > + /* > + * Do per-pixel face selection. We cannot however (as we used to do) > + * simply calculate the derivs afterwards (which is very bogus for > + * explicit derivs btw) because the values would be "random" when > + * not all pixels lie on the same face. So what we do here is just > + * calculate the derivatives after scaling the coords by the absolute > + * value of the inverse major axis, and essentially do rho calculation > + * steps as if it were a 3d texture. This is perfect if all pixels hit > + * the same face, but not so great at edges, I believe the max error > + * should be sqrt(2) with no_rho_approx or 2 otherwise (essentially > measuring > + * the 3d distance between 2 points on the cube instead of measuring > up/down > + * the edge). Still this is possibly a win over just selecting the same > face > + * for all pixels. Unfortunately, something like that doesn't work for > + * explicit derivatives. > + */ > + struct lp_build_context *cint_bld = &bld->int_coord_bld; > + struct lp_type intctype = cint_bld->type; > + LLVMTypeRef coord_vec_type = coord_bld->vec_type; > + LLVMTypeRef cint_vec_type = cint_bld->vec_type; > + LLVMValueRef as, at, ar, face, face_s, face_t; > + LLVMValueRef as_ge_at, maxasat, ar_ge_as_at; > + LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz; > + LLVMValueRef tnegi, rnegi; > + LLVMValueRef ma, mai, signma, signmabit, imahalfpos; > + LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5); > + LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype, > + 1 << (intctype.width - > 1)); > + LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype, > + intctype.width -1); > + LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_X); > + LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_Y); > + LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, > PIPE_TEX_FACE_POS_Z); > + LLVMValueRef s = coords[0]; > + LLVMValueRef t = coords[1]; > + LLVMValueRef r = coords[2]; > + > + assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1); > + assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1); > + assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1); > + > + /* > + * get absolute value (for x/y/z face selection) and sign bit > + * (for mirroring minor coords and pos/neg face selection) > + * of the original coords. > + */ > + as = lp_build_abs(&bld->coord_bld, s); > + at = lp_build_abs(&bld->coord_bld, t); > + ar = lp_build_abs(&bld->coord_bld, r); > > + /* > + * major face determination: select x if x > y else select y > + * select z if z >= max(x,y) else select previous result > + * if some axis are the same we chose z over y, y over x - the > + * dx10 spec seems to ask for it while OpenGL doesn't care (if we > + * wouldn't care could save a select or two if using different > + * compares and doing at_g_as_ar last since tnewx and tnewz are the > + * same). > + */ > + as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at); > + maxasat = lp_build_max(coord_bld, as, at); > + ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat); > + > + if (need_derivs && (derivs_in || > + ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && > + (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) { > /* > - * get absolute value (for x/y/z face selection) and sign bit > - * (for mirroring minor coords and pos/neg face selection) > - * of the original coords. > + * XXX: This is really really complex. > + * It is a bit overkill to use this for implicit derivatives as well, > + * no way this is worth the cost in practice, but seems to be the > + * only way for getting accurate and per-pixel lod values. > */ > - as = lp_build_abs(&bld->coord_bld, s); > - at = lp_build_abs(&bld->coord_bld, t); > - ar = lp_build_abs(&bld->coord_bld, r); > - > + LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3]; > + LLVMValueRef madx, mady, madxdivma, madydivma; > + LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi; > + LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi; > + LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz; > + LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz; > + LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy; > /* > - * major face determination: select x if x > y else select y > - * select z if z >= max(x,y) else select previous result > - * if some axis are the same we chose z over y, y over x - the > - * dx10 spec seems to ask for it while OpenGL doesn't care (if we > - * wouldn't care could save a select or two if using different > - * compares and doing at_g_as_ar last since tnewx and tnewz are the > - * same). > + * s = 1/2 * ( sc / ma + 1) > + * t = 1/2 * ( tc / ma + 1) > + * > + * s' = 1/2 * (sc' * ma - sc * ma') / ma^2 > + * t' = 1/2 * (tc' * ma - tc * ma') / ma^2 > + * > + * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma > + * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma > + * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma > + * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma > */ > - as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at); > - maxasat = lp_build_max(coord_bld, as, at); > - ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat); > - > - if (need_derivs && (derivs_in || > - ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && > - (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX)))) { > - /* > - * XXX: This is really really complex. > - * It is a bit overkill to use this for implicit derivatives as > well, > - * no way this is worth the cost in practice, but seems to be the > - * only way for getting accurate and per-pixel lod values. > - */ > - LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3]; > - LLVMValueRef madx, mady, madxdivma, madydivma; > - LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi; > - LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi; > - LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz; > - LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz; > - LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy; > - /* > - * s = 1/2 * ( sc / ma + 1) > - * t = 1/2 * ( tc / ma + 1) > - * > - * s' = 1/2 * (sc' * ma - sc * ma') / ma^2 > - * t' = 1/2 * (tc' * ma - tc * ma') / ma^2 > - * > - * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma > - * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma > - * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma > - * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma > - */ > - > - /* select ma, calculate ima */ > - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > - mai = LLVMBuildBitCast(builder, ma, cint_vec_type, ""); > - signmabit = LLVMBuildAnd(builder, mai, signmask, ""); > - ima = lp_build_div(coord_bld, coord_bld->one, ma); > - imahalf = lp_build_mul(coord_bld, posHalf, ima); > - imahalfpos = lp_build_abs(coord_bld, imahalf); > - > - if (!derivs_in) { > - ddx[0] = lp_build_ddx(coord_bld, s); > - ddx[1] = lp_build_ddx(coord_bld, t); > - ddx[2] = lp_build_ddx(coord_bld, r); > - ddy[0] = lp_build_ddy(coord_bld, s); > - ddy[1] = lp_build_ddy(coord_bld, t); > - ddy[2] = lp_build_ddy(coord_bld, r); > - } > - else { > - ddx[0] = derivs_in->ddx[0]; > - ddx[1] = derivs_in->ddx[1]; > - ddx[2] = derivs_in->ddx[2]; > - ddy[0] = derivs_in->ddy[0]; > - ddy[1] = derivs_in->ddy[1]; > - ddy[2] = derivs_in->ddy[2]; > - } > - > - /* select major derivatives */ > - madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], > ddx[1], ddx[2]); > - mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], > ddy[1], ddy[2]); > - > - si = LLVMBuildBitCast(builder, s, cint_vec_type, ""); > - ti = LLVMBuildBitCast(builder, t, cint_vec_type, ""); > - ri = LLVMBuildBitCast(builder, r, cint_vec_type, ""); > - > - sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, ""); > - tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, ""); > - rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, ""); > - > - sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, ""); > - tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, ""); > - rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, ""); > > - /* > - * compute all possible new s/t coords, which does the mirroring, > - * and do the same for derivs minor axes. > - * snewx = signma * -r; > - * tnewx = -t; > - * snewy = s; > - * tnewy = signma * r; > - * snewz = signma * s; > - * tnewz = -t; > - */ > - tnegi = LLVMBuildXor(builder, ti, signmask, ""); > - rnegi = LLVMBuildXor(builder, ri, signmask, ""); > - tdxnegi = LLVMBuildXor(builder, tdxi, signmask, ""); > - rdxnegi = LLVMBuildXor(builder, rdxi, signmask, ""); > - tdynegi = LLVMBuildXor(builder, tdyi, signmask, ""); > - rdynegi = LLVMBuildXor(builder, rdyi, signmask, ""); > - > - snewx = LLVMBuildXor(builder, signmabit, rnegi, ""); > - tnewx = tnegi; > - sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, ""); > - tdxnewx = tdxnegi; > - sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, ""); > - tdynewx = tdynegi; > - > - snewy = si; > - tnewy = LLVMBuildXor(builder, signmabit, ri, ""); > - sdxnewy = sdxi; > - tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, ""); > - sdynewy = sdyi; > - tdynewy = LLVMBuildXor(builder, signmabit, rdyi, ""); > - > - snewz = LLVMBuildXor(builder, signmabit, si, ""); > - tnewz = tnegi; > - sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, ""); > - tdxnewz = tdxnegi; > - sdynewz = LLVMBuildXor(builder, signmabit, sdyi, ""); > - tdynewz = tdynegi; > - > - /* select the mirrored values */ > - face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, > facey, facez); > - face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, > snewy, snewz); > - face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, > tnewy, tnewz); > - face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, > sdxnewx, sdxnewy, sdxnewz); > - face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, > tdxnewx, tdxnewy, tdxnewz); > - face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, > sdynewx, sdynewy, sdynewz); > - face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, > tdynewx, tdynewy, tdynewz); > - > - face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, ""); > - face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, ""); > - face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, ""); > - face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, ""); > - face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, ""); > - face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, ""); > - > - /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */ > - madxdivma = lp_build_mul(coord_bld, madx, ima); > - tmp = lp_build_mul(coord_bld, madxdivma, face_s); > - tmp = lp_build_sub(coord_bld, face_sdx, tmp); > - derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf); > - > - /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */ > - tmp = lp_build_mul(coord_bld, madxdivma, face_t); > - tmp = lp_build_sub(coord_bld, face_tdx, tmp); > - derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf); > - > - /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */ > - madydivma = lp_build_mul(coord_bld, mady, ima); > - tmp = lp_build_mul(coord_bld, madydivma, face_s); > - tmp = lp_build_sub(coord_bld, face_sdy, tmp); > - derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf); > - > - /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */ > - tmp = lp_build_mul(coord_bld, madydivma, face_t); > - tmp = lp_build_sub(coord_bld, face_tdy, tmp); > - derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf); > - > - signma = LLVMBuildLShr(builder, mai, signshift, ""); > - coords[2] = LLVMBuildOr(builder, face, signma, "face"); > - > - /* project coords */ > - face_s = lp_build_mul(coord_bld, face_s, imahalfpos); > - face_t = lp_build_mul(coord_bld, face_t, imahalfpos); > - > - coords[0] = lp_build_add(coord_bld, face_s, posHalf); > - coords[1] = lp_build_add(coord_bld, face_t, posHalf); > - > - return; > + /* select ma, calculate ima */ > + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > + mai = LLVMBuildBitCast(builder, ma, cint_vec_type, ""); > + signmabit = LLVMBuildAnd(builder, mai, signmask, ""); > + ima = lp_build_div(coord_bld, coord_bld->one, ma); > + imahalf = lp_build_mul(coord_bld, posHalf, ima); > + imahalfpos = lp_build_abs(coord_bld, imahalf); > + > + if (!derivs_in) { > + ddx[0] = lp_build_ddx(coord_bld, s); > + ddx[1] = lp_build_ddx(coord_bld, t); > + ddx[2] = lp_build_ddx(coord_bld, r); > + ddy[0] = lp_build_ddy(coord_bld, s); > + ddy[1] = lp_build_ddy(coord_bld, t); > + ddy[2] = lp_build_ddy(coord_bld, r); > } > - > - else if (need_derivs) { > - LLVMValueRef ddx_ddy[2], tmp[3], rho_vec; > - static const unsigned char swizzle0[] = { /* no-op swizzle */ > - 0, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle1[] = { > - 1, LP_BLD_SWIZZLE_DONTCARE, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle01[] = { /* no-op swizzle */ > - 0, 1, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle23[] = { > - 2, 3, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - static const unsigned char swizzle02[] = { > - 0, 2, > - LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > - }; > - > - /* > - * scale the s/t/r coords pre-select/mirror so we can calculate > - * "reasonable" derivs. > - */ > - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > - imahalfpos = lp_build_cube_imapos(coord_bld, ma); > - s = lp_build_mul(coord_bld, s, imahalfpos); > - t = lp_build_mul(coord_bld, t, imahalfpos); > - r = lp_build_mul(coord_bld, r, imahalfpos); > - > - /* > - * This isn't quite the same as the "ordinary" (3d deriv) path > since we > - * know the texture is square which simplifies things (we can omit > the > - * size mul which happens very early completely here and do it at > the > - * very end). > - * Also always do calculations according to > GALLIVM_DEBUG_NO_RHO_APPROX > - * since the error can get quite big otherwise at edges. > - * (With no_rho_approx max error is sqrt(2) at edges, same as it is > - * without no_rho_approx for 2d textures, otherwise it would be > factor 2.) > - */ > - ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t); > - ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); > - > - ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]); > - ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]); > - > - tmp[0] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01); > - tmp[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23); > - tmp[2] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02); > - > - rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]); > - rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]); > - > - tmp[0] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0); > - tmp[1] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); > - *rho = lp_build_max(coord_bld, tmp[0], tmp[1]); > + else { > + ddx[0] = derivs_in->ddx[0]; > + ddx[1] = derivs_in->ddx[1]; > + ddx[2] = derivs_in->ddx[2]; > + ddy[0] = derivs_in->ddy[0]; > + ddy[1] = derivs_in->ddy[1]; > + ddy[2] = derivs_in->ddy[2]; > } > > - if (!need_derivs) { > - ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > - } > - mai = LLVMBuildBitCast(builder, ma, cint_vec_type, ""); > - signmabit = LLVMBuildAnd(builder, mai, signmask, ""); > + /* select major derivatives */ > + madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], > ddx[1], ddx[2]); > + mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], > ddy[1], ddy[2]); > > si = LLVMBuildBitCast(builder, s, cint_vec_type, ""); > ti = LLVMBuildBitCast(builder, t, cint_vec_type, ""); > ri = LLVMBuildBitCast(builder, r, cint_vec_type, ""); > > + sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, ""); > + tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, ""); > + rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, ""); > + > + sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, ""); > + tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, ""); > + rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, ""); > + > /* > - * compute all possible new s/t coords, which does the mirroring > + * compute all possible new s/t coords, which does the mirroring, > + * and do the same for derivs minor axes. > * snewx = signma * -r; > * tnewx = -t; > * snewy = s; > @@ -1817,164 +1592,200 @@ lp_build_cube_lookup(struct lp_build_sample_context > *bld, > */ > tnegi = LLVMBuildXor(builder, ti, signmask, ""); > rnegi = LLVMBuildXor(builder, ri, signmask, ""); > + tdxnegi = LLVMBuildXor(builder, tdxi, signmask, ""); > + rdxnegi = LLVMBuildXor(builder, rdxi, signmask, ""); > + tdynegi = LLVMBuildXor(builder, tdyi, signmask, ""); > + rdynegi = LLVMBuildXor(builder, rdyi, signmask, ""); > > snewx = LLVMBuildXor(builder, signmabit, rnegi, ""); > tnewx = tnegi; > + sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, ""); > + tdxnewx = tdxnegi; > + sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, ""); > + tdynewx = tdynegi; > > snewy = si; > tnewy = LLVMBuildXor(builder, signmabit, ri, ""); > + sdxnewy = sdxi; > + tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, ""); > + sdynewy = sdyi; > + tdynewy = LLVMBuildXor(builder, signmabit, rdyi, ""); > > snewz = LLVMBuildXor(builder, signmabit, si, ""); > tnewz = tnegi; > + sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, ""); > + tdxnewz = tdxnegi; > + sdynewz = LLVMBuildXor(builder, signmabit, sdyi, ""); > + tdynewz = tdynegi; > > /* select the mirrored values */ > + face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, > facez); > face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, > snewy, snewz); > face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, > tnewy, tnewz); > - face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, > facez); > + face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, > sdxnewy, sdxnewz); > + face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, > tdxnewy, tdxnewz); > + face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, > sdynewy, sdynewz); > + face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, > tdynewy, tdynewz); > > face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, ""); > face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, ""); > + face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, ""); > + face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, ""); > + face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, ""); > + face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, ""); > + > + /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */ > + madxdivma = lp_build_mul(coord_bld, madx, ima); > + tmp = lp_build_mul(coord_bld, madxdivma, face_s); > + tmp = lp_build_sub(coord_bld, face_sdx, tmp); > + derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf); > + > + /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */ > + tmp = lp_build_mul(coord_bld, madxdivma, face_t); > + tmp = lp_build_sub(coord_bld, face_tdx, tmp); > + derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf); > + > + /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */ > + madydivma = lp_build_mul(coord_bld, mady, ima); > + tmp = lp_build_mul(coord_bld, madydivma, face_s); > + tmp = lp_build_sub(coord_bld, face_sdy, tmp); > + derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf); > + > + /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */ > + tmp = lp_build_mul(coord_bld, madydivma, face_t); > + tmp = lp_build_sub(coord_bld, face_tdy, tmp); > + derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf); > > - /* add +1 for neg face */ > - /* XXX with AVX probably want to use another select here - > - * as long as we ensure vblendvps gets used we can actually > - * skip the comparison and just use sign as a "mask" directly. > - */ > signma = LLVMBuildLShr(builder, mai, signshift, ""); > coords[2] = LLVMBuildOr(builder, face, signma, "face"); > > /* project coords */ > - if (!need_derivs) { > - imahalfpos = lp_build_cube_imapos(coord_bld, ma); > - face_s = lp_build_mul(coord_bld, face_s, imahalfpos); > - face_t = lp_build_mul(coord_bld, face_t, imahalfpos); > - } > + face_s = lp_build_mul(coord_bld, face_s, imahalfpos); > + face_t = lp_build_mul(coord_bld, face_t, imahalfpos); > > coords[0] = lp_build_add(coord_bld, face_s, posHalf); > coords[1] = lp_build_add(coord_bld, face_t, posHalf); > + > + return; > } > > - else { > - struct lp_build_if_state if_ctx; > - LLVMValueRef face_s_var; > - LLVMValueRef face_t_var; > - LLVMValueRef face_var; > - LLVMValueRef arx_ge_ary_arz, ary_ge_arx_arz; > - LLVMValueRef shuffles[4]; > - LLVMValueRef arxy_ge_aryx, arxy_ge_arzz, arxy_ge_arxy_arzz; > - LLVMValueRef arxyxy, aryxzz, arxyxy_ge_aryxzz; > - LLVMValueRef tmp[4], rxyz, arxyz; > - struct lp_build_context *float_bld = &bld->float_bld; > - LLVMValueRef s, t, r, face, face_s, face_t; > - > - assert(bld->coord_bld.type.length == 4); > - > - tmp[0] = s = coords[0]; > - tmp[1] = t = coords[1]; > - tmp[2] = r = coords[2]; > - rxyz = lp_build_hadd_partial4(&bld->coord_bld, tmp, 3); > - arxyz = lp_build_abs(&bld->coord_bld, rxyz); > - > - shuffles[0] = lp_build_const_int32(gallivm, 0); > - shuffles[1] = lp_build_const_int32(gallivm, 1); > - shuffles[2] = lp_build_const_int32(gallivm, 0); > - shuffles[3] = lp_build_const_int32(gallivm, 1); > - arxyxy = LLVMBuildShuffleVector(builder, arxyz, arxyz, > LLVMConstVector(shuffles, 4), ""); > - shuffles[0] = lp_build_const_int32(gallivm, 1); > - shuffles[1] = lp_build_const_int32(gallivm, 0); > - shuffles[2] = lp_build_const_int32(gallivm, 2); > - shuffles[3] = lp_build_const_int32(gallivm, 2); > - aryxzz = LLVMBuildShuffleVector(builder, arxyz, arxyz, > LLVMConstVector(shuffles, 4), ""); > - arxyxy_ge_aryxzz = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_GEQUAL, > arxyxy, aryxzz); > - > - shuffles[0] = lp_build_const_int32(gallivm, 0); > - shuffles[1] = lp_build_const_int32(gallivm, 1); > - arxy_ge_aryx = LLVMBuildShuffleVector(builder, arxyxy_ge_aryxzz, > arxyxy_ge_aryxzz, > - LLVMConstVector(shuffles, 2), > ""); > - shuffles[0] = lp_build_const_int32(gallivm, 2); > - shuffles[1] = lp_build_const_int32(gallivm, 3); > - arxy_ge_arzz = LLVMBuildShuffleVector(builder, arxyxy_ge_aryxzz, > arxyxy_ge_aryxzz, > - LLVMConstVector(shuffles, 2), > ""); > - arxy_ge_arxy_arzz = LLVMBuildAnd(builder, arxy_ge_aryx, arxy_ge_arzz, > ""); > - > - arx_ge_ary_arz = LLVMBuildExtractElement(builder, arxy_ge_arxy_arzz, > - lp_build_const_int32(gallivm, > 0), ""); > - arx_ge_ary_arz = LLVMBuildICmp(builder, LLVMIntNE, arx_ge_ary_arz, > - lp_build_const_int32(gallivm, > 0), ""); > - ary_ge_arx_arz = LLVMBuildExtractElement(builder, arxy_ge_arxy_arzz, > - lp_build_const_int32(gallivm, > 1), ""); > - ary_ge_arx_arz = LLVMBuildICmp(builder, LLVMIntNE, ary_ge_arx_arz, > - lp_build_const_int32(gallivm, > 0), ""); > - face_s_var = lp_build_alloca(gallivm, bld->coord_bld.vec_type, > "face_s_var"); > - face_t_var = lp_build_alloca(gallivm, bld->coord_bld.vec_type, > "face_t_var"); > - face_var = lp_build_alloca(gallivm, bld->int_bld.vec_type, > "face_var"); > - > - lp_build_if(&if_ctx, gallivm, arx_ge_ary_arz); > - { > - /* +/- X face */ > - LLVMValueRef sign, ima; > - si = LLVMBuildExtractElement(builder, rxyz, > - lp_build_const_int32(gallivm, 0), ""); > - /* +/- X face */ > - sign = lp_build_sgn(float_bld, si); > - ima = lp_build_cube_imaneg(coord_bld, s); > - face_s = lp_build_cube_coord(coord_bld, sign, +1, r, ima); > - face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); > - face = lp_build_cube_face(bld, si, > - PIPE_TEX_FACE_POS_X, > - PIPE_TEX_FACE_NEG_X); > - LLVMBuildStore(builder, face_s, face_s_var); > - LLVMBuildStore(builder, face_t, face_t_var); > - LLVMBuildStore(builder, face, face_var); > - } > - lp_build_else(&if_ctx); > - { > - struct lp_build_if_state if_ctx2; > - > - lp_build_if(&if_ctx2, gallivm, ary_ge_arx_arz); > - { > - LLVMValueRef sign, ima; > - /* +/- Y face */ > - ti = LLVMBuildExtractElement(builder, rxyz, > - lp_build_const_int32(gallivm, 1), > ""); > - sign = lp_build_sgn(float_bld, ti); > - ima = lp_build_cube_imaneg(coord_bld, t); > - face_s = lp_build_cube_coord(coord_bld, NULL, -1, s, ima); > - face_t = lp_build_cube_coord(coord_bld, sign, -1, r, ima); > - face = lp_build_cube_face(bld, ti, > - PIPE_TEX_FACE_POS_Y, > - PIPE_TEX_FACE_NEG_Y); > - LLVMBuildStore(builder, face_s, face_s_var); > - LLVMBuildStore(builder, face_t, face_t_var); > - LLVMBuildStore(builder, face, face_var); > - } > - lp_build_else(&if_ctx2); > - { > - /* +/- Z face */ > - LLVMValueRef sign, ima; > - ri = LLVMBuildExtractElement(builder, rxyz, > - lp_build_const_int32(gallivm, 2), > ""); > - sign = lp_build_sgn(float_bld, ri); > - ima = lp_build_cube_imaneg(coord_bld, r); > - face_s = lp_build_cube_coord(coord_bld, sign, -1, s, ima); > - face_t = lp_build_cube_coord(coord_bld, NULL, +1, t, ima); > - face = lp_build_cube_face(bld, ri, > - PIPE_TEX_FACE_POS_Z, > - PIPE_TEX_FACE_NEG_Z); > - LLVMBuildStore(builder, face_s, face_s_var); > - LLVMBuildStore(builder, face_t, face_t_var); > - LLVMBuildStore(builder, face, face_var); > - } > - lp_build_endif(&if_ctx2); > - } > + else if (need_derivs) { > + LLVMValueRef ddx_ddy[2], tmp[3], rho_vec; > + static const unsigned char swizzle0[] = { /* no-op swizzle */ > + 0, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle1[] = { > + 1, LP_BLD_SWIZZLE_DONTCARE, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle01[] = { /* no-op swizzle */ > + 0, 1, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle23[] = { > + 2, 3, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + static const unsigned char swizzle02[] = { > + 0, 2, > + LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE > + }; > + > + /* > + * scale the s/t/r coords pre-select/mirror so we can calculate > + * "reasonable" derivs. > + */ > + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > + imahalfpos = lp_build_cube_imapos(coord_bld, ma); > + s = lp_build_mul(coord_bld, s, imahalfpos); > + t = lp_build_mul(coord_bld, t, imahalfpos); > + r = lp_build_mul(coord_bld, r, imahalfpos); > + > + /* > + * This isn't quite the same as the "ordinary" (3d deriv) path since > we > + * know the texture is square which simplifies things (we can omit the > + * size mul which happens very early completely here and do it at the > + * very end). > + * Also always do calculations according to > GALLIVM_DEBUG_NO_RHO_APPROX > + * since the error can get quite big otherwise at edges. > + * (With no_rho_approx max error is sqrt(2) at edges, same as it is > + * without no_rho_approx for 2d textures, otherwise it would be factor > 2.) > + */ > + ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t); > + ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r); > > - lp_build_endif(&if_ctx); > + ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]); > + ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]); > > - coords[0] = LLVMBuildLoad(builder, face_s_var, "face_s"); > - coords[1] = LLVMBuildLoad(builder, face_t_var, "face_t"); > - face = LLVMBuildLoad(builder, face_var, "face"); > - coords[2] = lp_build_broadcast_scalar(&bld->int_coord_bld, face); > + tmp[0] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01); > + tmp[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23); > + tmp[2] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02); > + > + rho_vec = lp_build_add(coord_bld, tmp[0], tmp[1]); > + rho_vec = lp_build_add(coord_bld, rho_vec, tmp[2]); > + > + tmp[0] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0); > + tmp[1] = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1); > + *rho = lp_build_max(coord_bld, tmp[0], tmp[1]); > } > + > + if (!need_derivs) { > + ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r); > + } > + mai = LLVMBuildBitCast(builder, ma, cint_vec_type, ""); > + signmabit = LLVMBuildAnd(builder, mai, signmask, ""); > + > + si = LLVMBuildBitCast(builder, s, cint_vec_type, ""); > + ti = LLVMBuildBitCast(builder, t, cint_vec_type, ""); > + ri = LLVMBuildBitCast(builder, r, cint_vec_type, ""); > + > + /* > + * compute all possible new s/t coords, which does the mirroring > + * snewx = signma * -r; > + * tnewx = -t; > + * snewy = s; > + * tnewy = signma * r; > + * snewz = signma * s; > + * tnewz = -t; > + */ > + tnegi = LLVMBuildXor(builder, ti, signmask, ""); > + rnegi = LLVMBuildXor(builder, ri, signmask, ""); > + > + snewx = LLVMBuildXor(builder, signmabit, rnegi, ""); > + tnewx = tnegi; > + > + snewy = si; > + tnewy = LLVMBuildXor(builder, signmabit, ri, ""); > + > + snewz = LLVMBuildXor(builder, signmabit, si, ""); > + tnewz = tnegi; > + > + /* select the mirrored values */ > + face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, > snewz); > + face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, > tnewz); > + face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, > facez); > + > + face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, ""); > + face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, ""); > + > + /* add +1 for neg face */ > + /* XXX with AVX probably want to use another select here - > + * as long as we ensure vblendvps gets used we can actually > + * skip the comparison and just use sign as a "mask" directly. > + */ > + signma = LLVMBuildLShr(builder, mai, signshift, ""); > + coords[2] = LLVMBuildOr(builder, face, signma, "face"); > + > + /* project coords */ > + if (!need_derivs) { > + imahalfpos = lp_build_cube_imapos(coord_bld, ma); > + face_s = lp_build_mul(coord_bld, face_s, imahalfpos); > + face_t = lp_build_mul(coord_bld, face_t, imahalfpos); > + } > + > + coords[0] = lp_build_add(coord_bld, face_s, posHalf); > + coords[1] = lp_build_add(coord_bld, face_t, posHalf); > } > > > -- > 1.7.9.5 > _______________________________________________ mesa-dev mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/mesa-dev
