On Wed, Sep 26, 2012 at 1:43 PM, Søren Sandmann <sandm...@cs.au.dk> wrote: > From: Søren Sandmann Pedersen <s...@redhat.com> > > A new struct argb_t containing a floating point pixel is added to > pixman-private.h, and conversion routines are added to pixman-utils.c > to convert normalized integers to and from that struct. > > New functions: > > - pixman_expand_to_float() > Expands a buffer of integer pixels to a buffer of argb_t pixels > > - pixman_contract_from_float() > Converts a buffer of argb_t pixels to a buffer integer pixels > > - pixman_float_to_unorm() > Converts a floating point number to an unsigned normalized integer > > - pixman_unorm_to_float() > Converts an unsigned normalized integer to a floating point number > --- > pixman/pixman-private.h | 35 +++++++++++++++ > pixman/pixman-utils.c | 107 > +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 142 insertions(+), 0 deletions(-) > > diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h > index c82316f..91f35ed 100644 > --- a/pixman/pixman-private.h > +++ b/pixman/pixman-private.h > @@ -45,6 +45,16 @@ typedef struct radial_gradient radial_gradient_t; > typedef struct bits_image bits_image_t; > typedef struct circle circle_t; > > +typedef struct argb_t argb_t; > + > +struct argb_t > +{ > + float a; > + float r; > + float g; > + float b; > +}; > + > typedef void (*fetch_scanline_t) (pixman_image_t *image, > int x, > int y, > @@ -792,12 +802,34 @@ pixman_expand (uint64_t * dst, > const uint32_t * src, > pixman_format_code_t format, > int width); > +void > +pixman_expand_to_float (argb_t *dst, > + const uint32_t *src, > + pixman_format_code_t format, > + int width); > > void > pixman_contract (uint32_t * dst, > const uint64_t *src, > int width); > > +void > +pixman_contract_from_float (uint32_t *dst, > + const argb_t *src, > + int width); > + > +pixman_bool_t > +_pixman_lookup_composite_function (pixman_implementation_t *toplevel, > + pixman_op_t op, > + pixman_format_code_t src_format, > + uint32_t src_flags, > + pixman_format_code_t mask_format, > + uint32_t mask_flags, > + pixman_format_code_t dest_format, > + uint32_t dest_flags, > + pixman_implementation_t **out_imp, > + pixman_composite_func_t *out_func); > + > /* Region Helpers */ > pixman_bool_t > pixman_region32_copy_from_region16 (pixman_region32_t *dst, > @@ -957,6 +989,9 @@ unorm_to_unorm (uint32_t val, int from_bits, int to_bits) > return result; > } > > +uint16_t pixman_float_to_unorm (float f, int n_bits); > +float pixman_unorm_to_float (uint16_t u, int n_bits); > + > /* > * Various debugging code > */ > diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c > index e4a9730..4f9db29 100644 > --- a/pixman/pixman-utils.c > +++ b/pixman/pixman-utils.c > @@ -162,6 +162,113 @@ pixman_expand (uint64_t * dst, > } > } > > +static force_inline uint16_t > +float_to_unorm (float f, int n_bits) > +{ > + uint32_t u; > + > + if (f > 1.0) > + f = 1.0; > + if (f < 0.0) > + f = 0.0; > + > + u = f * (1 << n_bits); > + u -= (u >> n_bits); > + > + return u; > +} > + > +static force_inline float > +unorm_to_float (uint16_t u, int n_bits) > +{ > + uint32_t m = ((1 << n_bits) - 1); > + > + return (u & m) * (1.f / (float)m); > +} > + > +/* > + * This function expands images from a8r8g8b8 to argb_t. To preserve > + * precision, it needs to know from which source format the a8r8g8b8 pixels > + * originally came. > + * > + * For example, if the source was PIXMAN_x1r5g5b5 and the red component > + * contained bits 12345, then the 8-bit value is 12345123. To correctly > + * expand this to floating point, it should be 12345 / 31.0 and not > + * 12345123 / 255.0. > + */ > +void > +pixman_expand_to_float (argb_t *dst, > + const uint32_t *src, > + pixman_format_code_t format, > + int width) > +{ > + int a_size, r_size, g_size, b_size; > + int a_shift, r_shift, g_shift, b_shift; > + int i; > + > + if (!PIXMAN_FORMAT_VIS (format)) > + format = PIXMAN_a8r8g8b8; > + > + /* > + * Determine the sizes of each component and the masks and shifts > + * required to extract them from the source pixel. > + */ > + a_size = PIXMAN_FORMAT_A (format); > + r_size = PIXMAN_FORMAT_R (format); > + g_size = PIXMAN_FORMAT_G (format); > + b_size = PIXMAN_FORMAT_B (format); > + > + a_shift = 32 - a_size; > + r_shift = 24 - r_size; > + g_shift = 16 - g_size; > + b_shift = 8 - b_size; > + > + /* Start at the end so that we can do the expansion in place > + * when src == dst > + */ > + for (i = width - 1; i >= 0; i--) > + { > + const uint32_t pixel = src[i]; > + > + dst[i].a = a_size? unorm_to_float (pixel >> a_shift, a_size) : 1.0; > + dst[i].r = r_size? unorm_to_float (pixel >> r_shift, r_size) : 0.0; > + dst[i].g = g_size? unorm_to_float (pixel >> g_shift, g_size) : 0.0; > + dst[i].b = b_size? unorm_to_float (pixel >> b_shift, b_size) : 0.0; > + } > +} > + > +uint16_t > +pixman_float_to_unorm (float f, int n_bits) > +{ > + return float_to_unorm (f, n_bits); > +} > + > +float > +pixman_unorm_to_float (uint16_t u, int n_bits) > +{ > + return unorm_to_float (u, n_bits); > +} > + > +void > +pixman_contract_from_float (uint32_t *dst, > + const argb_t *src, > + int width) > +{ > + int i; > + > + for (i = 0; i < width; ++i) > + { > + uint8_t a, r, g, b; > + > + a = float_to_unorm (src[i].a, 8); > + r = float_to_unorm (src[i].r, 8); > + g = float_to_unorm (src[i].g, 8); > + b = float_to_unorm (src[i].b, 8); > + > + dst[i] = (a << 24) | (r << 16) | (g << 8) | (b << 0); > + } > +} > + > /* > * Contracting is easier than expanding. We just need to truncate the > * components. > -- > 1.7.4 > > _______________________________________________ > Pixman mailing list > Pixman@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/pixman
As I'm sure you know, these functions can be done with SSE 2 or 4.1 if we could convert 4 pixels at once. How can we override their implementations with optimized ones? SSE2 expand to float could be something like __m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b a r g b a */ Get individual components of vsrc, such that pix1 = 0 0 0 r 0 0 0 r 0 0 0 r 0 0 0 r pix2 = 0 0 0 g 0 0 0 g 0 0 0 g 0 0 0 g pix3 = 0 0 0 b 0 0 0 b 0 0 0 b 0 0 0 b pix4 = 0 0 0 a 0 0 0 a 0 0 0 a 0 0 0 a Convert to floats with _mm_cvtepi32_ps: __m128 R = _mm_cvtepi32_ps(r); /* - R 1 -:- R 2 -:- R 3 -:- R 4 - */ __m128 G = _mm_cvtepi32_ps(g); /* - G 1 -:- G 2 -:- G 3 -:- G 4 - */ __m128 B = _mm_cvtepi32_ps(b); /* - B 1 -:- B 2 -:- B 3 -:- B 4 - */ __m128 A = _mm_cvtepi32_ps(a); /* - A 1 -:- A 2 -:- A 3 -:- A 4 - */ And finally transpose before storing. _MM_TRANSPOSE4_PS (R, G, B, A); Or, with SSE4 we can use _mm_cvtepu8_epi32: __m128i vsrc = _mm_loadu_si128 (src); /* vsrc = r g b a r g b a r g b a r g b a */ __m128i pix1 = _mm_cvtepu8_epi32 (vsrc); __m128i pix2 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 32)); __m128i pix3 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 64)); __m128i pix4 = _mm_cvtepu8_epi32 (_mm_srli_si128 (vsrc, 96)); __m128 fpix1 = _mm_cvtepi32_ps (pix1); __m128 fpix2 = _mm_cvtepi32_ps (pix2); __m128 fpix3 = _mm_cvtepi32_ps (pix3); __m128 fpix4 = _mm_cvtepi32_ps (pix4); Totally untested. Probably has bugs. Waiting for Siarhei to show me how to make this better. :) _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman