unpack operations

Paul Berry Fri, 25 Jan 2013 07:46:16 -0800

On 24 January 2013 19:47, Matt Turner <matts...@gmail.com> wrote:

> Lower them to arithmetic and bit manipulation expressions.
> ---
>  src/glsl/ir_optimization.h          |    6 +
>  src/glsl/lower_packing_builtins.cpp |  279
> +++++++++++++++++++++++++++++++++++
>  2 files changed, 285 insertions(+), 0 deletions(-)
>
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index ac90b87..8f33018 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -54,6 +54,12 @@ enum lower_packing_builtins_op {
>
>     LOWER_PACK_HALF_2x16_TO_SPLIT        = 0x0040,
>     LOWER_UNPACK_HALF_2x16_TO_SPLIT      = 0x0080,
> +
> +   LOWER_PACK_SNORM_4x8                 = 0x0100,
> +   LOWER_UNPACK_SNORM_4x8               = 0x0200,
> +
> +   LOWER_PACK_UNORM_4x8                 = 0x0400,
> +   LOWER_UNPACK_UNORM_4x8               = 0x0800,
>  };
>
>  bool do_common_optimization(exec_list *ir, bool linked,
> diff --git a/src/glsl/lower_packing_builtins.cpp
> b/src/glsl/lower_packing_builtins.cpp
> index 49176cc..aa6765f 100644
> --- a/src/glsl/lower_packing_builtins.cpp
> +++ b/src/glsl/lower_packing_builtins.cpp
> @@ -85,9 +85,15 @@ public:
>        case LOWER_PACK_SNORM_2x16:
>           *rvalue = lower_pack_snorm_2x16(op0);
>           break;
> +      case LOWER_PACK_SNORM_4x8:
> +         *rvalue = lower_pack_snorm_4x8(op0);
> +         break;
>        case LOWER_PACK_UNORM_2x16:
>           *rvalue = lower_pack_unorm_2x16(op0);
>           break;
> +      case LOWER_PACK_UNORM_4x8:
> +         *rvalue = lower_pack_unorm_4x8(op0);
> +         break;
>        case LOWER_PACK_HALF_2x16:
>           *rvalue = lower_pack_half_2x16(op0);
>           break;
> @@ -97,9 +103,15 @@ public:
>        case LOWER_UNPACK_SNORM_2x16:
>           *rvalue = lower_unpack_snorm_2x16(op0);
>           break;
> +      case LOWER_UNPACK_SNORM_4x8:
> +         *rvalue = lower_unpack_snorm_4x8(op0);
> +         break;
>        case LOWER_UNPACK_UNORM_2x16:
>           *rvalue = lower_unpack_unorm_2x16(op0);
>           break;
> +      case LOWER_UNPACK_UNORM_4x8:
> +         *rvalue = lower_unpack_unorm_4x8(op0);
> +         break;
>        case LOWER_UNPACK_HALF_2x16:
>           *rvalue = lower_unpack_half_2x16(op0);
>           break;
> @@ -137,18 +149,30 @@ private:
>        case ir_unop_pack_snorm_2x16:
>           result = op_mask & LOWER_PACK_SNORM_2x16;
>           break;
> +      case ir_unop_pack_snorm_4x8:
> +         result = op_mask & LOWER_PACK_SNORM_4x8;
> +         break;
>        case ir_unop_pack_unorm_2x16:
>           result = op_mask & LOWER_PACK_UNORM_2x16;
>           break;
> +      case ir_unop_pack_unorm_4x8:
> +         result = op_mask & LOWER_PACK_UNORM_4x8;
> +         break;
>        case ir_unop_pack_half_2x16:
>           result = op_mask & (LOWER_PACK_HALF_2x16 |
> LOWER_PACK_HALF_2x16_TO_SPLIT);
>           break;
>        case ir_unop_unpack_snorm_2x16:
>           result = op_mask & LOWER_UNPACK_SNORM_2x16;
>           break;
> +      case ir_unop_unpack_snorm_4x8:
> +         result = op_mask & LOWER_UNPACK_SNORM_4x8;
> +         break;
>        case ir_unop_unpack_unorm_2x16:
>           result = op_mask & LOWER_UNPACK_UNORM_2x16;
>           break;
> +      case ir_unop_unpack_unorm_4x8:
> +         result = op_mask & LOWER_UNPACK_UNORM_4x8;
> +         break;
>        case ir_unop_unpack_half_2x16:
>           result = op_mask & (LOWER_UNPACK_HALF_2x16 |
> LOWER_UNPACK_HALF_2x16_TO_SPLIT);
>           break;
> @@ -214,6 +238,30 @@ private:
>     }
>
>     /**
> +    * \brief Pack four uint8's into a single uint32.
> +    *
> +    * Interpret the given uvec4 as a uint32 quad. Pack the quad into a
> uint32
> +    * where the least significant bits specify the first element of the
> quad.
> +    * Return the uint32.
> +    */
> +   ir_rvalue*
> +   pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
> +   {
> +      assert(uvec4_rval->type == glsl_type::uvec4_type);
> +
> +      /* uvec4 u = UVEC4_RVAL; */
> +      ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
> +                                          "tmp_pack_uvec4_to_uint");
> +      factory.emit(assign(u, uvec4_rval));
>


Rather than do four scalar bit_and(..., constant(0xffu)) instructions
below, how about changing the above line to:

factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));

That way we take advantage of vector processing in the GPU to do all four
bit_ands at once.

With that fixed (as well as the copy/paste errors Ian spotted), this patch
is:

Reviewed-by: Paul Berry <stereotype...@gmail.com>


> +
> +      /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff)
> << 8) | (u.x & 0xff); */
> +      return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)),
> constant(24u)),
> +                           lshift(bit_and(swizzle_z(u), constant(0xffu)),
> constant(16u))),
> +                    bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)),
> constant(8u)),
> +                           bit_and(swizzle_x(u), constant(0xffu))));
> +   }
> +
> +   /**
>      * \brief Unpack a uint32 into two uint16's.
>      *
>      * Interpret the given uint32 as a uint16 pair where the uint32's least
> @@ -244,6 +292,44 @@ private:
>     }
>
>     /**
> +    * \brief Unpack a uint32 into four uint8's.
> +    *
> +    * Interpret the given uint32 as a uint8 quad where the uint32's least
> +    * significant bits specify the quad's first element. Return the uint8
> +    * quad as a uvec4.
> +    */
> +   ir_rvalue*
> +   unpack_uint_to_uvec4(ir_rvalue *uint_rval)
> +   {
> +      assert(uint_rval->type == glsl_type::uint_type);
> +
> +      /* uint u = UINT_RVAL; */
> +      ir_variable *u = factory.make_temp(glsl_type::uint_type,
> +                                          "tmp_unpack_uint_to_uvec4_u");
> +      factory.emit(assign(u, uint_rval));
> +
> +      /* uvec4 u4; */
> +      ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
> +                                           "tmp_unpack_uint_to_uvec4_u4");
> +
> +      /* u4.x = u & 0xffu; */
> +      factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
> +
> +      /* u4.y = (u >> 8u) & 0xffu; */
> +      factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
> +                                      constant(0xffu)), WRITEMASK_Y));
> +
> +      /* u4.z = (u >> 16u) & 0xffu; */
> +      factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
> +                                      constant(0xffu)), WRITEMASK_Z));
> +
> +      /* u4.w = (u >> 24u) */
> +      factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
> +
> +      return deref(u4).val;
> +   }
> +
> +   /**
>      * \brief Lower a packSnorm2x16 expression.
>      *
>      * \param vec2_rval is packSnorm2x16's input
> @@ -293,6 +379,55 @@ private:
>     }
>
>     /**
> +    * \brief Lower a packSnorm4x8 expression.
> +    *
> +    * \param vec4_rval is packSnorm4x8's input
> +    * \return packSnorm4x8's output as a uint rvalue
> +    */
> +   ir_rvalue*
> +   lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
> +   {
> +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> +       *
> +       *    highp uint packSnorm4x8(vec4 v)
> +       *    -------------------------------
> +       *    First, converts each component of the normalized
> floating-point value
> +       *    v into 8-bit integer values. Then, the results are packed
> into the
> +       *    returned 32-bit unsigned integer.
> +       *
> +       *    The conversion for component c of v to fixed point is done as
> +       *    follows:
> +       *
> +       *       packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
> +       *
> +       *    The first component of the vector will be written to the least
> +       *    significant bits of the output; the last component will be
> written to
> +       *    the most significant bits.
> +       *
> +       * This function generates IR that approximates the following
> pseudo-GLSL:
> +       *
> +       *     return pack_uvec4_to_uint(
> +       *         uvec4(ivec4(
> +       *           round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
> +       *
> +       * It is necessary to first convert the vec4 to ivec4 rather than
> directly
> +       * converting vec4 to uvec4 because the latter conversion is
> undefined.
> +       * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined
> to
> +       * convert a negative floating point value to an uint".
> +       */
> +      assert(vec4_rval->type == glsl_type::vec4_type);
> +
> +      ir_rvalue *result = pack_uvec4_to_uint(
> +            i2u(f2i(round_even(mul(clamp(vec4_rval,
> +                                         constant(-1.0f),
> +                                         constant(1.0f)),
> +                                   constant(127.0f))))));
> +
> +      assert(result->type == glsl_type::uint_type);
> +      return result;
> +   }
> +
> +   /**
>      * \brief Lower an unpackSnorm2x16 expression.
>      *
>      * \param uint_rval is unpackSnorm2x16's input
> @@ -352,6 +487,65 @@ private:
>     }
>
>     /**
> +    * \brief Lower an unpackSnorm4x8 expression.
> +    *
> +    * \param uint_rval is unpackSnorm4x8's input
> +    * \return unpackSnorm4x8's output as a vec4 rvalue
> +    */
> +   ir_rvalue*
> +   lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
> +   {
> +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> +       *
> +       *    highp vec4 unpackSnorm4x8 (highp uint p)
> +       *    ----------------------------------------
> +       *    First, unpacks a single 32-bit unsigned integer p into four
> +       *    8-bit unsigned integers. Then, each component is converted to
> +       *    a normalized floating-point value to generate the returned
> +       *    four-component vector.
> +       *
> +       *    The conversion for unpacked fixed-point value f to floating
> point is
> +       *    done as follows:
> +       *
> +       *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
> +       *
> +       *    The first component of the returned vector will be extracted
> from the
> +       *    least significant bits of the input; the last component will
> be
> +       *    extracted from the most significant bits.
> +       *
> +       * This function generates IR that approximates the following
> pseudo-GLSL:
> +       *
> +       *    return clamp(
> +       *       ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) /
> 127.0f,
> +       *       -1.0f, 1.0f);
> +       *
> +       * The above IR may appear unnecessarily complex, but the
> intermediate
> +       * conversion to ivec4 and the bit shifts are necessary to
> correctly unpack
> +       * negative floats.
> +       *
> +       * To see why, consider packing and then unpacking vec4(-1.0, 0.0,
> 0.0,
> +       * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During
> unpacking, we
> +       * place that int8 into an int32, which results in the *positive*
> integer
> +       * 0x000000ff.  The int8's sign bit becomes, in the int32, the
> rather
> +       * unimportant bit 8. We must now extend the int8's sign bit into
> bits
> +       * 9-32, which is accomplished by left-shifting then right-shifting.
> +       */
> +
> +      assert(uint_rval->type == glsl_type::uint_type);
> +
> +      ir_rvalue *result =
> +        clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
> +                                    constant(24u)),
> +                             constant(24u))),
> +                  constant(127.0f)),
> +              constant(-1.0f),
> +              constant(1.0f));
> +
> +      assert(result->type == glsl_type::vec4_type);
> +      return result;
> +   }
> +
> +   /**
>      * \brief Lower a packUnorm2x16 expression.
>      *
>      * \param vec2_rval is packUnorm2x16's input
> @@ -396,6 +590,50 @@ private:
>     }
>
>     /**
> +    * \brief Lower a packUnorm4x8 expression.
> +    *
> +    * \param vec4_rval is packUnorm4x8's input
> +    * \return packUnorm4x8's output as a uint rvalue
> +    */
> +   ir_rvalue*
> +   lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
> +   {
> +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> +       *
> +       *    highp uint packUnorm4x8 (vec4 v)
> +       *    --------------------------------
> +       *    First, converts each component of the normalized
> floating-point value
> +       *    v into 16-bit integer values. Then, the results are packed
> into the
> +       *    returned 32-bit unsigned integer.
> +       *
> +       *    The conversion for component c of v to fixed point is done as
> +       *    follows:
> +       *
> +       *       packUnorm4x8: round(clamp(c, 0, +1) * 65535.0)
> +       *
> +       *    The first component of the vector will be written to the least
> +       *    significant bits of the output; the last component will be
> written to
> +       *    the most significant bits.
> +       *
> +       * This function generates IR that approximates the following
> pseudo-GLSL:
> +       *
> +       *     return pack_uvec4_to_uint(uvec4(
> +       *                round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
> +       *
> +       * Here it is safe to directly convert the vec4 to uvec4 because
> the the
> +       * vec4 has been clamped to a non-negative range.
> +       */
> +
> +      assert(vec4_rval->type == glsl_type::vec4_type);
> +
> +      ir_rvalue *result = pack_uvec4_to_uint(
> +         f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
> +
> +      assert(result->type == glsl_type::uint_type);
> +      return result;
> +   }
> +
> +   /**
>      * \brief Lower an unpackUnorm2x16 expression.
>      *
>      * \param uint_rval is unpackUnorm2x16's input
> @@ -437,6 +675,47 @@ private:
>     }
>
>     /**
> +    * \brief Lower an unpackUnorm4x8 expression.
> +    *
> +    * \param uint_rval is unpackUnorm4x8's input
> +    * \return unpackUnorm4x8's output as a vec4 rvalue
> +    */
> +   ir_rvalue*
> +   lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
> +   {
> +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> +       *
> +       *    highp vec4 unpackUnorm4x8 (highp uint p)
> +       *    ----------------------------------------
> +       *    First, unpacks a single 32-bit unsigned integer p into four
> +       *    8-bit unsigned integers. Then, each component is converted to
> +       *    a normalized floating-point value to generate the returned
> +       *    two-component vector.
> +       *
> +       *    The conversion for unpacked fixed-point value f to floating
> point is
> +       *    done as follows:
> +       *
> +       *       unpackUnorm4x8: f / 255.0
> +       *
> +       *    The first component of the returned vector will be extracted
> from the
> +       *    least significant bits of the input; the last component will
> be
> +       *    extracted from the most significant bits.
> +       *
> +       * This function generates IR that approximates the following
> pseudo-GLSL:
> +       *
> +       *     return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
> +       */
> +
> +      assert(uint_rval->type == glsl_type::uint_type);
> +
> +      ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
> +                              constant(255.0f));
> +
> +      assert(result->type == glsl_type::vec4_type);
> +      return result;
> +   }
> +
> +   /**
>      * \brief Lower the component-wise calculation of packHalf2x16.
>      *
>      * \param f_rval is one component of packHafl2x16's input
> --
> 1.7.8.6
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/8] glsl: Add support for lowering 4x8 pack/unpack operations

Reply via email to