I have made some changes to the file pixman-vmx.c, which uses vmx (aka altivec) to optimize pixman. Basically, what I did: Changed vec_perm to now perform xor operation over the positions so it will work regardless of endianness. Replaced usage of vec_mergeh, vec_mergel and vec_mladd by vec_mulo and vec_mule plus vec_add and vec_perm. The result is the same and not affected by endianness differences. Replaced usage of vec_lvsl to direct unaligned assignment operation (=). That is because, according to Power ABI Specification, the usage of lvsl is deprecated on ppc64le. Changed COMPUTE_SHIFT_{MASK,MASKS,MASKC} macro usage to no-op for powerpc little endian since unaligned access is supported on ppc64le. After those changes, all tests passed on ppc64, ppc64le and powerpc vms.
Signed-off-by: Fernando Seiti Furusato <ferse...@linux.vnet.ibm.com> --- pixman/pixman-vmx.c | 106 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 39 deletions(-) diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c index c33631c..57918c1 100644 --- a/pixman/pixman-vmx.c +++ b/pixman/pixman-vmx.c @@ -37,46 +37,49 @@ static force_inline vector unsigned int splat_alpha (vector unsigned int pix) { - return vec_perm (pix, pix, - (vector unsigned char)AVV ( - 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, - 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); + union { + unsigned short s; + unsigned char c[2]; + } endian_xor = {0x0300}; + + /* endian_xor.c[1] will be 3 if little endian and 0 if big endian */ + vector unsigned char perm = vec_splat((vector unsigned char) + AVV (endian_xor.c[1]),0); + perm = vec_xor (perm,(vector unsigned char) AVV ( + 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04, + 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C)); + return vec_perm (pix, pix, perm); } static force_inline vector unsigned int pix_multiply (vector unsigned int p, vector unsigned int a) { - vector unsigned short hi, lo, mod; - - /* unpack to short */ - hi = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)p); - - mod = (vector unsigned short) - vec_mergeh ((vector unsigned char)AVV (0), - (vector unsigned char)a); - - hi = vec_mladd (hi, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); + vector unsigned short hi, lo, even, odd; + + /* unpack to short while multiplying p and a even positions */ + even = vec_mule((vector unsigned char)p, (vector unsigned char)a); + even = vec_add(even, (vector unsigned short)AVV + (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + + /* unpack to short while multiplying p and a odd positions */ + odd = vec_mulo ((vector unsigned char)p, (vector unsigned char)a); + odd = vec_add (odd, (vector unsigned short)AVV + (0x0080, 0x0080, 0x0080, 0x0080, + 0x0080, 0x0080, 0x0080, 0x0080)); + + /* change split from even and odd positions to high and low ends */ + hi = vec_perm (even, odd, (vector unsigned char)AVV + (0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, + 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17)); + lo = vec_perm (even, odd, (vector unsigned char)AVV + (0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, + 0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F)); hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8))); hi = vec_sr (hi, vec_splat_u16 (8)); - /* unpack to short */ - lo = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)p); - mod = (vector unsigned short) - vec_mergel ((vector unsigned char)AVV (0), - (vector unsigned char)a); - - lo = vec_mladd (lo, mod, (vector unsigned short) - AVV (0x0080, 0x0080, 0x0080, 0x0080, - 0x0080, 0x0080, 0x0080, 0x0080)); - lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8))); lo = vec_sr (lo, vec_splat_u16 (8)); @@ -129,29 +132,26 @@ over (vector unsigned int src, over (pix_multiply (src, mask), \ pix_multiply (srca, mask), dest) +#ifdef WORDS_BIGENDIAN -#define COMPUTE_SHIFT_MASK(source) \ +# define COMPUTE_SHIFT_MASK(source) \ source ## _mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKS(dest, source) \ +# define COMPUTE_SHIFT_MASKS(dest, source) \ source ## _mask = vec_lvsl (0, source); -#define COMPUTE_SHIFT_MASKC(dest, source, mask) \ +# define COMPUTE_SHIFT_MASKC(dest, source, mask) \ mask ## _mask = vec_lvsl (0, mask); \ source ## _mask = vec_lvsl (0, source); -/* notice you have to declare temp vars... - * Note: tmp3 and tmp4 must remain untouched! - */ - -#define LOAD_VECTORS(dest, source) \ +# define LOAD_VECTORS(dest, source) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ v ## source = (typeof(v ## source)) \ vec_perm (tmp1, tmp2, source ## _mask); \ v ## dest = (typeof(v ## dest))vec_ld (0, dest); -#define LOAD_VECTORSC(dest, source, mask) \ +# define LOAD_VECTORSC(dest, source, mask) \ tmp1 = (typeof(tmp1))vec_ld (0, source); \ tmp2 = (typeof(tmp2))vec_ld (15, source); \ v ## source = (typeof(v ## source)) \ @@ -162,6 +162,34 @@ over (vector unsigned int src, v ## mask = (typeof(v ## mask)) \ vec_perm (tmp1, tmp2, mask ## _mask); +#else //WORDS_BIGENDIAN + +/* Now the COMPUTE_SHIFT_{MASK, MASKS, MASKC} below are just no-op. + * They are defined that way because little endian altivec can do unaligned + * reads natively and have no need for constructing the permutation pattern + * variables. + */ +# define COMPUTE_SHIFT_MASK(source) + +# define COMPUTE_SHIFT_MASKS(dest, source) + +# define COMPUTE_SHIFT_MASKC(dest, source, mask) + +# define LOAD_VECTORS(dest, source) \ + v ## source = *((typeof(v ## source)*)source); \ + v ## dest = *((typeof(v ## dest)*)dest); + +# define LOAD_VECTORSC(dest, source, mask) \ + v ## source = *((typeof(v ## source)*)source); \ + v ## dest = *((typeof(v ## dest)*)dest); \ + v ## mask = *((typeof(v ## mask)*)mask); + +#endif //WORDS_BIGENDIAN + +/* notice you have to declare temp vars... + * Note: tmp3 and tmp4 must remain untouched! + */ + #define LOAD_VECTORSM(dest, source, mask) \ LOAD_VECTORSC (dest, source, mask) \ v ## source = pix_multiply (v ## source, \ -- 2.1.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman