Define bitfield packing, unpacking and type conversion operations in terms of which the image format conversion code will be implemented. These don't directly know about image formats: The packing and unpacking functions take a 4-tuple of bit shifts and a 4-tuple of bit widths as arguments, determining the bitfield position of each component. Most of the remaining functions perform integer, fixed point normalized, and floating point type conversions, mapping between a target type with the per-component bit widths given by a parameter and a matching native representation of the same type. --- src/mesa/drivers/dri/i965/brw_ir_surface_builder.h | 385 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_reg.h | 3 + 2 files changed, 388 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h index e46c7c1..317ad15 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h +++ b/src/mesa/drivers/dri/i965/brw_ir_surface_builder.h @@ -719,6 +719,391 @@ namespace brw { return dst; } } + + namespace detail { + /** + * Simple 4-tuple of scalars used to pass around per-color component + * values. It has to be parameterized on T because we need values of a + * number of different types. + */ + template<typename T> + struct color { + color(T x = 0) : r(x), g(x), b(x), a(x) + { + } + + color(T r, T g, T b, T a) : r(r), g(g), b(b), a(a) + { + } + + T + operator[](unsigned i) const + { + const T xs[] = { r, g, b, a }; + return xs[i]; + } + + T r, g, b, a; + }; + + /** + * Return the bitmask of non-zero components of the given 4-tuple. + */ + template<typename T> + unsigned + bitmask(const color<T> &c) + { + return !!c.r << 0 | !!c.g << 1 | !!c.b << 2 | !!c.a << 3; + } + } + + namespace image_format_conversion { + using detail::color; + + namespace detail { + /** + * Maximum representable value in an unsigned integer with the given + * number of bits. + */ + inline unsigned + scale(unsigned n) + { + return (1 << n) - 1; + } + + /** + * Load a 4-component constant vector into registers. + */ + template<typename T> + src_svec4 + emit_vector_imm(const svec4_builder &bld, const color<T> &c) + { + const dst_svec4 dst = bld.natural_reg(fs_reg(T()).type); + + for (int i = 0; i < 4; ++i) + bld.MOV(writemask(dst, 1 << i), c[i]); + + return dst; + } + + /** + * Load a 4-component constant vector into registers. + * + * This SIMD4x2 specialization attempts to use vector immediates and + * writemasking to minimize the number of copies. This definitely + * belongs in a general vectorization pass rather than here, but we + * don't have such a pass yet and we want to generate reasonable code in + * the meantime. + */ + template<typename T> + src_reg + emit_vector_imm(const vec4_builder &bld, const color<T> &c) + { + const dst_reg dst = bld.natural_reg(src_reg(T()).type); + + if (brw_float_to_vf(c.r) != -1 && + brw_float_to_vf(c.g) != -1 && + brw_float_to_vf(c.b) != -1 && + brw_float_to_vf(c.a) != -1) { + bld.MOV(dst, src_reg(brw_float_to_vf(c.r), brw_float_to_vf(c.g), + brw_float_to_vf(c.b), brw_float_to_vf(c.a))); + + } else { + for (unsigned mask_left = dst.writemask; mask_left;) { + const unsigned i = ffs(mask_left) - 1; + const unsigned mask = bitmask( + color<bool>(c.r == c[i], c.g == c[i], + c.b == c[i], c.a == c[i])); + + bld.MOV(writemask(dst, mask), c[i]); + mask_left &= ~mask; + } + } + + return dst; + } + } + + /** + * Pack the vector \p src in a bitfield given the per-component bit + * shifts and widths. + */ + template<typename B, typename S> + S + emit_pack(const B &bld, const S &src, + const color<unsigned> &shifts, + const color<unsigned> &widths) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + + /* Shift each component left to the correct bitfield position. */ + bld.SHL(writemask(dst, mask), src, + emit_vector_imm(bld, color<unsigned>( + shifts.r % 32, shifts.g % 32, + shifts.b % 32, shifts.a % 32))); + + /* Add everything up. */ + if (mask >> 1) { + assert(shifts.r + widths.r <= 32 && shifts.g + widths.g <= 32 && + shifts.b + widths.b <= 64 && shifts.a + widths.a <= 64); + bld.OR(writemask(dst, WRITEMASK_XY), + swizzle(dst, BRW_SWIZZLE_XZXZ), + swizzle(dst, (mask >> 3 ? BRW_SWIZZLE_YWYW : + BRW_SWIZZLE_YZYZ))); + } + + if (mask >> 2 && (shifts.b < 32 || shifts.a < 32)) { + assert(shifts.b + widths.b <= 32 && shifts.a + widths.a <= 32); + bld.OR(writemask(dst, WRITEMASK_X), + swizzle(dst, BRW_SWIZZLE_XXXX), + swizzle(dst, BRW_SWIZZLE_YYYY)); + } + + return dst; + } + + /** + * Unpack a vector from the bitfield \p src given the per-component bit + * shifts and widths. + */ + template<typename B, typename S> + S + emit_unpack(const B &bld, const S &src, + const color<unsigned> &shifts, + const color<unsigned> &widths) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const typename B::dst_reg dst = bld.natural_reg(src.type); + + /* Shift left to discard the most significant bits. */ + bld.SHL(writemask(dst, mask), + swizzle(src, BRW_SWIZZLE4(shifts.r / 32, shifts.g / 32, + shifts.b / 32, shifts.a / 32)), + emit_vector_imm(bld, color<unsigned>( + 32 - shifts.r % 32 - widths.r, + 32 - shifts.g % 32 - widths.g, + 32 - shifts.b % 32 - widths.b, + 32 - shifts.a % 32 - widths.a))); + + /* Shift back to the least significant bits using an arithmetic + * shift to get sign extension on signed types. + */ + bld.ASR(writemask(dst, mask), dst, + emit_vector_imm(bld, color<unsigned>( + 32 - widths.r, 32 - widths.g, + 32 - widths.b, 32 - widths.a))); + + return dst; + } + + /** + * Convert a vector into an integer vector of the specified signedness + * and bit widths, properly handling overflow. + */ + template<typename B, typename S> + S + emit_convert_to_integer(const B &bld, const S &src, + const color<unsigned> &widths, + bool is_signed) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const unsigned s = (is_signed ? 1 : 0); + const typename B::dst_reg dst = bld.natural_reg( + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD); + + bld.MOV(writemask(dst, mask), src); + + /* Clamp to the minimum value. */ + if (is_signed) { + const S min = emit_vector_imm(bld, color<int>( + -scale(widths.r - s) - 1, -scale(widths.g - s) - 1, + -scale(widths.b - s) - 1, -scale(widths.a - s) - 1)); + bld.emit_minmax(writemask(dst, mask), dst, min, + BRW_CONDITIONAL_G); + } + + /* Clamp to the maximum value. */ + const S max = emit_vector_imm(bld, color<int>( + scale(widths.r - s), scale(widths.g - s), + scale(widths.b - s), scale(widths.a - s))); + bld.emit_minmax(writemask(dst, mask), dst, max, + BRW_CONDITIONAL_L); + + return dst; + } + + /** + * Convert a normalized fixed-point vector of the specified signedness + * and bit widths into a floating point vector. + */ + template<typename B, typename S> + S + emit_convert_from_scaled(const B &bld, const S &src, + const color<unsigned> &widths, + bool is_signed) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const unsigned s = (is_signed ? 1 : 0); + const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_F); + + /* Convert to float. */ + bld.MOV(writemask(dst, mask), src); + + /* Divide by the normalization constants. */ + bld.MUL(writemask(dst, mask), dst, + emit_vector_imm(bld, color<float>( + 1.0 / scale(widths.r - s), 1.0 / scale(widths.g - s), + 1.0 / scale(widths.b - s), 1.0 / scale(widths.a - s)))); + + /* Clamp to the minimum value. */ + if (is_signed) + bld.emit_minmax(writemask(dst, mask), dst, -1.0f, + BRW_CONDITIONAL_G); + + return dst; + } + + /** + * Convert a floating point vector into a normalized fixed-point vector + * of the specified signedness and bit widths. + */ + template<typename B, typename S> + S + emit_convert_to_scaled(const B &bld, const S &src, + const color<unsigned> &widths, + bool is_signed) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const unsigned s = (is_signed ? 1 : 0); + const typename B::dst_reg dst = bld.natural_reg( + is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD); + const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + bld.MOV(writemask(fdst, mask), src); + + /* Clamp to the minimum value. */ + if (is_signed) + bld.emit_minmax(writemask(fdst, mask), fdst, -1.0f, + BRW_CONDITIONAL_G); + + /* Clamp to the maximum value. */ + bld.emit_minmax(writemask(fdst, mask), fdst, 1.0f, + BRW_CONDITIONAL_L); + + /* Multiply by the normalization constants. */ + bld.MUL(writemask(fdst, mask), fdst, + emit_vector_imm(bld, color<float>( + scale(widths.r - s), scale(widths.g - s), + scale(widths.b - s), scale(widths.a - s)))); + + /* Convert to integer. */ + bld.RNDE(writemask(fdst, mask), fdst); + bld.MOV(writemask(dst, mask), fdst); + + return dst; + } + + /** + * Convert a floating point vector of the specified bit widths into a + * 32-bit floating point vector. + */ + template<typename B, typename S> + S + emit_convert_from_float(const B &bld, const S &src, + const color<unsigned> &widths) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const unsigned shift_mask = bitmask( + color<bool>(widths.r < 16, widths.g < 16, + widths.b < 16, widths.a < 16)); + const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + bld.MOV(writemask(dst, mask), src); + + /* Extend 10-bit and 11-bit floating point numbers to 15 bits. + * This works because they have a 5-bit exponent just like the + * 16-bit floating point format, and they have no sign bit. + */ + if (shift_mask) + bld.SHL(writemask(dst, shift_mask), dst, + emit_vector_imm(bld, color<unsigned>( + 15 - widths.r, 15 - widths.g, + 15 - widths.b, 15 - widths.a))); + + /* Convert to 32-bit floating point. */ + bld.F16TO32(writemask(fdst, mask), dst); + + return fdst; + } + + /** + * Convert a vector into a floating point vector of the specified bit + * widths. + */ + template<typename B, typename S> + S + emit_convert_to_float(const B &bld, const S &src, + const color<unsigned> &widths) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const unsigned shift_mask = bitmask( + color<bool>(widths.r < 16, widths.g < 16, + widths.b < 16, widths.a < 16)); + const typename B::dst_reg dst = bld.natural_reg(BRW_REGISTER_TYPE_UD); + const typename B::dst_reg fdst = retype(dst, BRW_REGISTER_TYPE_F); + + bld.MOV(writemask(fdst, mask), src); + + /* Clamp to the minimum value. */ + if (shift_mask) + bld.emit_minmax(writemask(fdst, shift_mask), fdst, 0.0f, + BRW_CONDITIONAL_G); + + /* Convert to 16-bit floating-point. */ + bld.F32TO16(writemask(dst, mask), fdst); + + /* Discard the least significant bits to get floating point numbers + * of the requested width. This works because the 10-bit and + * 11-bit floating point formats have a 5-bit exponent just like + * the 16-bit format, and they have no sign bit. + */ + if (shift_mask) + bld.SHR(writemask(dst, shift_mask), dst, + emit_vector_imm(bld, color<unsigned>( + 15 - widths.r, 15 - widths.g, + 15 - widths.b, 15 - widths.a))); + + return dst; + } + + /** + * Fill missing components of a vector with 0, 0, 0, 1. + */ + template<typename B, typename S> + S + emit_pad(const B &bld, const S &src, const color<unsigned> &widths) + { + using namespace detail; + const unsigned mask = bitmask(widths); + const typename B::dst_reg dst = bld.natural_reg(src.type); + + bld.MOV(writemask(dst, mask), src); + if (~mask & dst.writemask) + bld.MOV(writemask(dst, ~mask), + emit_vector_imm(bld, color<unsigned>(0, 0, 0, 1))); + + return dst; + } + } } #endif diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index c03a8ae..668f83c 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -82,7 +82,10 @@ struct brw_device_info; #define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) #define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) +#define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2) #define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3) +#define BRW_SWIZZLE_YZYZ BRW_SWIZZLE4(1,2,1,2) +#define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3) #define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3) #define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3) -- 2.3.5 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev