On 01/25/2013 11:59 AM, Chad Versace wrote: > On 01/24/2013 07:47 PM, Matt Turner wrote: >> Lower them to arithmetic and bit manipulation expressions. >> --- >> src/glsl/ir_optimization.h | 6 + >> src/glsl/lower_packing_builtins.cpp | 279 >> +++++++++++++++++++++++++++++++++++ >> 2 files changed, 285 insertions(+), 0 deletions(-) >> >> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h >> index ac90b87..8f33018 100644 >> --- a/src/glsl/ir_optimization.h >> +++ b/src/glsl/ir_optimization.h >> @@ -54,6 +54,12 @@ enum lower_packing_builtins_op { >> >> LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, >> LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, >> + >> + LOWER_PACK_SNORM_4x8 = 0x0100, >> + LOWER_UNPACK_SNORM_4x8 = 0x0200, >> + >> + LOWER_PACK_UNORM_4x8 = 0x0400, >> + LOWER_UNPACK_UNORM_4x8 = 0x0800, >> }; >> >> bool do_common_optimization(exec_list *ir, bool linked, >> diff --git a/src/glsl/lower_packing_builtins.cpp >> b/src/glsl/lower_packing_builtins.cpp >> index 49176cc..aa6765f 100644 >> --- a/src/glsl/lower_packing_builtins.cpp >> +++ b/src/glsl/lower_packing_builtins.cpp >> @@ -85,9 +85,15 @@ public: >> case LOWER_PACK_SNORM_2x16: >> *rvalue = lower_pack_snorm_2x16(op0); >> break; >> + case LOWER_PACK_SNORM_4x8: >> + *rvalue = lower_pack_snorm_4x8(op0); >> + break; >> case LOWER_PACK_UNORM_2x16: >> *rvalue = lower_pack_unorm_2x16(op0); >> break; >> + case LOWER_PACK_UNORM_4x8: >> + *rvalue = lower_pack_unorm_4x8(op0); >> + break; >> case LOWER_PACK_HALF_2x16: >> *rvalue = lower_pack_half_2x16(op0); >> break; >> @@ -97,9 +103,15 @@ public: >> case LOWER_UNPACK_SNORM_2x16: >> *rvalue = lower_unpack_snorm_2x16(op0); >> break; >> + case LOWER_UNPACK_SNORM_4x8: >> + *rvalue = lower_unpack_snorm_4x8(op0); >> + break; >> case LOWER_UNPACK_UNORM_2x16: >> *rvalue = lower_unpack_unorm_2x16(op0); >> break; >> + case LOWER_UNPACK_UNORM_4x8: >> + *rvalue = lower_unpack_unorm_4x8(op0); >> + break; >> case LOWER_UNPACK_HALF_2x16: >> *rvalue = lower_unpack_half_2x16(op0); >> break; >> @@ -137,18 +149,30 @@ private: >> case ir_unop_pack_snorm_2x16: >> result = op_mask & LOWER_PACK_SNORM_2x16; >> break; >> + case ir_unop_pack_snorm_4x8: >> + result = op_mask & LOWER_PACK_SNORM_4x8; >> + break; >> case ir_unop_pack_unorm_2x16: >> result = op_mask & LOWER_PACK_UNORM_2x16; >> break; >> + case ir_unop_pack_unorm_4x8: >> + result = op_mask & LOWER_PACK_UNORM_4x8; >> + break; >> case ir_unop_pack_half_2x16: >> result = op_mask & (LOWER_PACK_HALF_2x16 | >> LOWER_PACK_HALF_2x16_TO_SPLIT); >> break; >> case ir_unop_unpack_snorm_2x16: >> result = op_mask & LOWER_UNPACK_SNORM_2x16; >> break; >> + case ir_unop_unpack_snorm_4x8: >> + result = op_mask & LOWER_UNPACK_SNORM_4x8; >> + break; >> case ir_unop_unpack_unorm_2x16: >> result = op_mask & LOWER_UNPACK_UNORM_2x16; >> break; >> + case ir_unop_unpack_unorm_4x8: >> + result = op_mask & LOWER_UNPACK_UNORM_4x8; >> + break; >> case ir_unop_unpack_half_2x16: >> result = op_mask & (LOWER_UNPACK_HALF_2x16 | >> LOWER_UNPACK_HALF_2x16_TO_SPLIT); >> break; >> @@ -214,6 +238,30 @@ private: >> } >> >> /** >> + * \brief Pack four uint8's into a single uint32. >> + * >> + * Interpret the given uvec4 as a uint32 quad. Pack the quad into a >> uint32 >> + * where the least significant bits specify the first element of the >> quad. >> + * Return the uint32. >> + */ > > I find the term "uint32 quad" confusing. It is too reminiscient of "quadword". > This not-so-bright reviewer thought: "A uint32 quadword? Huh? Oh! That means > a uint32 4-tuple". So, I'd like to see the phrase changed to "uint32 4-tuple" > or something similar, but this suggestion doesn't block the patch. > >> + ir_rvalue* >> + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) >> + { >> + assert(uvec4_rval->type == glsl_type::uvec4_type); >> + >> + /* uvec4 u = UVEC4_RVAL; */ >> + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, >> + "tmp_pack_uvec4_to_uint"); >> + factory.emit(assign(u, uvec4_rval)); >> + >> + /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff) >> << 8) | (u.x & 0xff); */ > ^^^ missing & >> + return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), >> constant(24u)), >> + lshift(bit_and(swizzle_z(u), constant(0xffu)), >> constant(16u))), >> + bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), >> constant(8u)), >> + bit_and(swizzle_x(u), constant(0xffu)))); > > The four bit-and instructions should be factored out to a single instruction. > Paul already covered the best way to do that. > > > >> /** >> + * \brief Unpack a uint32 into four uint8's. >> + * >> + * Interpret the given uint32 as a uint8 quad where the uint32's least >> + * significant bits specify the quad's first element. Return the uint8 >> + * quad as a uvec4. >> + */ > > Same complaint about "uint8 quad". > > >> + ir_rvalue* >> + unpack_uint_to_uvec4(ir_rvalue *uint_rval) >> + { >> + assert(uint_rval->type == glsl_type::uint_type); >> + >> + /* uint u = UINT_RVAL; */ >> + ir_variable *u = factory.make_temp(glsl_type::uint_type, >> + "tmp_unpack_uint_to_uvec4_u"); >> + factory.emit(assign(u, uint_rval)); >> + >> + /* uvec4 u4; */ >> + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, >> + "tmp_unpack_uint_to_uvec4_u4"); >> + >> + /* u4.x = u & 0xffu; */ >> + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); >> + >> + /* u4.y = (u >> 8u) & 0xffu; */ >> + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), >> + constant(0xffu)), WRITEMASK_Y)); >> + >> + /* u4.z = (u >> 16u) & 0xffu; */ >> + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), >> + constant(0xffu)), WRITEMASK_Z)); >> + >> + /* u4.w = (u >> 24u) */ >> + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); >> + >> + return deref(u4).val; >> + } > > As for unpack_uvec4_to_uint(), the three bit-and instructions should be > factored into one. Like this: > > u4.x = u; > u4.y = u >> 8u; > u4.z = u >> 16u; > u4.w = u >> 24u; > u4 = u4 & 0xff; > return u4; > > > >> /** >> + * \brief Lower a packUnorm4x8 expression. >> + * >> + * \param vec4_rval is packUnorm4x8's input >> + * \return packUnorm4x8's output as a uint rvalue >> + */ >> + ir_rvalue* >> + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) >> + { >> + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: >> + * >> + * highp uint packUnorm4x8 (vec4 v) >> + * -------------------------------- >> + * First, converts each component of the normalized floating-point >> value >> + * v into 16-bit integer values. Then, the results are packed into >> the >> + * returned 32-bit unsigned integer. >> + * >> + * The conversion for component c of v to fixed point is done as >> + * follows: >> + * >> + * packUnorm4x8: round(clamp(c, 0, +1) * 65535.0) > ^^^^^^^ > Copy-paste error. Should be 255.0. > > With the copy-paste errors fixed and the instruction reductions applied, > this is > > Reviewed-by: Chad Versace <chad.vers...@linux.intel.com> >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev