----- Original Message ----- > From: "Bill Schmidt" <[email protected]> > To: "Hal Finkel" <[email protected]> > Cc: [email protected] > Sent: Friday, June 6, 2014 11:32:29 AM > Subject: Re: r210340 - [PPC64LE] Implement little-endian semantics for > vec_pack family > > Hm, that probably isn't true. I suppose the test will "pass" by > producing the wrong (big-endian) code generation, so this is a > reasonable approach. > > I'll look into it.
Okay, thanks. Having the tests with the commits is almost always better, so if it is feasible, then I'd prefer it. Thanks again, Hal > > Bill > > On Fri, 2014-06-06 at 11:07 -0500, Hal Finkel wrote: > > ----- Original Message ----- > > > From: "Bill Schmidt" <[email protected]> > > > To: [email protected] > > > Sent: Friday, June 6, 2014 10:10:47 AM > > > Subject: r210340 - [PPC64LE] Implement little-endian semantics > > > for vec_pack family > > > > > > Author: wschmidt > > > Date: Fri Jun 6 10:10:47 2014 > > > New Revision: 210340 > > > > > > URL: http://llvm.org/viewvc/llvm-project?rev=210340&view=rev > > > Log: > > > [PPC64LE] Implement little-endian semantics for vec_pack family > > > > > > The PowerPC vector-pack instructions are defined architecturally > > > with > > > a big-endian bias, in that the vector element numbering is > > > assumed to > > > be "left to right" regardless of whether the processor is in > > > big-endian or little-endian mode. This definition is unnatural > > > for > > > little-endian code generation. > > > > > > To facilitate ease of porting, the vec_pack and related > > > interfaces > > > are > > > designed to use natural element ordering, so that elements are > > > numbered according to little-endian design principles when code > > > is > > > generated for a little-endian target. The vec_pack calls are > > > implemented as calls to vec_perm, specifying selection of the > > > odd-numbered vector elements. For little endian, this means the > > > odd-numbered elements counting from the right end of the > > > register. > > > Since the underlying instructions count from the left end, we > > > must > > > instead select the even-numbered vector elements for little > > > endian to > > > achieve the desired semantics. > > > > > > The correctness of this code is tested by the new pack.c test > > > added > > > in > > > a previous patch. I plan to later make the existing ppc32 > > > Altivec > > > compile-time tests work for ppc64 and ppc64le as well. > > > > I don't understand this... > > > > test/CodeGen/builtins-ppc-altivec.c seems to be the relevant place > > to test this, and this test works fine on ppc64 (if you change the > > current triple from powerpc-unknown-unknown to > > powerpc64-unknown-unknown it still passes). Please do this and add > > tests for these changes there. > > > > Thanks again, > > Hal > > > > > > > > Modified: > > > cfe/trunk/lib/Headers/altivec.h > > > > > > Modified: cfe/trunk/lib/Headers/altivec.h > > > URL: > > > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/altivec.h?rev=210340&r1=210339&r2=210340&view=diff > > > ============================================================================== > > > --- cfe/trunk/lib/Headers/altivec.h (original) > > > +++ cfe/trunk/lib/Headers/altivec.h Fri Jun 6 10:10:47 2014 > > > @@ -4117,52 +4117,91 @@ vec_vor(vector float __a, vector bool in > > > > > > /* vec_pack */ > > > > > > +/* The various vector pack instructions have a big-endian bias, > > > so > > > for > > > + little endian we must handle reversed element numbering. */ > > > + > > > static vector signed char __ATTRS_o_ai > > > vec_pack(vector signed short __a, vector signed short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector signed char)vec_perm(__a, __b, (vector unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector signed char)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > static vector unsigned char __ATTRS_o_ai > > > vec_pack(vector unsigned short __a, vector unsigned short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector unsigned char)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector unsigned char)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > static vector bool char __ATTRS_o_ai > > > vec_pack(vector bool short __a, vector bool short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector bool char)vec_perm(__a, __b, (vector unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector bool char)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > static vector short __ATTRS_o_ai > > > vec_pack(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector short)vec_perm(__a, __b, (vector unsigned char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector short)vec_perm(__a, __b, (vector unsigned char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_pack(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector unsigned short)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector unsigned short)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > static vector bool short __ATTRS_o_ai > > > vec_pack(vector bool int __a, vector bool int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector unsigned short)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector bool short)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > /* vec_vpkuhum */ > > > @@ -4172,25 +4211,43 @@ vec_pack(vector bool int __a, vector boo > > > static vector signed char __ATTRS_o_ai > > > vec_vpkuhum(vector signed short __a, vector signed short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector signed char)vec_perm(__a, __b, (vector unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector signed char)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > static vector unsigned char __ATTRS_o_ai > > > vec_vpkuhum(vector unsigned short __a, vector unsigned short > > > __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector unsigned char)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector unsigned char)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > static vector bool char __ATTRS_o_ai > > > vec_vpkuhum(vector bool short __a, vector bool short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector bool char)vec_perm(__a, __b, (vector unsigned > > > char) > > > + (0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, > > > + 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E)); > > > +#else > > > return (vector bool char)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x01, 0x03, 0x05, 0x07, 0x09, 0x0B, 0x0D, 0x0F, > > > 0x11, 0x13, 0x15, 0x17, 0x19, 0x1B, 0x1D, 0x1F)); > > > +#endif > > > } > > > > > > /* vec_vpkuwum */ > > > @@ -4200,25 +4257,43 @@ vec_vpkuhum(vector bool short __a, vecto > > > static vector short __ATTRS_o_ai > > > vec_vpkuwum(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector short)vec_perm(__a, __b, (vector unsigned char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector short)vec_perm(__a, __b, (vector unsigned char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_vpkuwum(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector unsigned short)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector unsigned short)vec_perm(__a, __b, (vector > > > unsigned > > > char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > static vector bool short __ATTRS_o_ai > > > vec_vpkuwum(vector bool int __a, vector bool int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector bool short)vec_perm(__a, __b, (vector unsigned > > > char) > > > + (0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0C, 0x0D, > > > + 0x10, 0x11, 0x14, 0x15, 0x18, 0x19, 0x1C, 0x1D)); > > > +#else > > > return (vector bool short)vec_perm(__a, __b, (vector unsigned > > > char) > > > (0x02, 0x03, 0x06, 0x07, 0x0A, 0x0B, 0x0E, 0x0F, > > > 0x12, 0x13, 0x16, 0x17, 0x1A, 0x1B, 0x1E, 0x1F)); > > > +#endif > > > } > > > > > > /* vec_packpx */ > > > @@ -4226,7 +4301,11 @@ vec_vpkuwum(vector bool int __a, vector > > > static vector pixel __attribute__((__always_inline__)) > > > vec_packpx(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector pixel)__builtin_altivec_vpkpx(__b, __a); > > > +#else > > > return (vector pixel)__builtin_altivec_vpkpx(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkpx */ > > > @@ -4234,7 +4313,11 @@ vec_packpx(vector unsigned int __a, vect > > > static vector pixel __attribute__((__always_inline__)) > > > vec_vpkpx(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return (vector pixel)__builtin_altivec_vpkpx(__b, __a); > > > +#else > > > return (vector pixel)__builtin_altivec_vpkpx(__a, __b); > > > +#endif > > > } > > > > > > /* vec_packs */ > > > @@ -4242,25 +4325,41 @@ vec_vpkpx(vector unsigned int __a, vecto > > > static vector signed char __ATTRS_o_ai > > > vec_packs(vector short __a, vector short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkshss(__b, __a); > > > +#else > > > return __builtin_altivec_vpkshss(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned char __ATTRS_o_ai > > > vec_packs(vector unsigned short __a, vector unsigned short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuhus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuhus(__a, __b); > > > +#endif > > > } > > > > > > static vector signed short __ATTRS_o_ai > > > vec_packs(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkswss(__b, __a); > > > +#else > > > return __builtin_altivec_vpkswss(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_packs(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuwus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuwus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkshss */ > > > @@ -4268,7 +4367,11 @@ vec_packs(vector unsigned int __a, vecto > > > static vector signed char __attribute__((__always_inline__)) > > > vec_vpkshss(vector short __a, vector short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkshss(__b, __a); > > > +#else > > > return __builtin_altivec_vpkshss(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkuhus */ > > > @@ -4276,7 +4379,11 @@ vec_vpkshss(vector short __a, vector sho > > > static vector unsigned char __attribute__((__always_inline__)) > > > vec_vpkuhus(vector unsigned short __a, vector unsigned short > > > __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuhus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuhus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkswss */ > > > @@ -4284,7 +4391,11 @@ vec_vpkuhus(vector unsigned short __a, v > > > static vector signed short __attribute__((__always_inline__)) > > > vec_vpkswss(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkswss(__b, __a); > > > +#else > > > return __builtin_altivec_vpkswss(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkuwus */ > > > @@ -4292,7 +4403,11 @@ vec_vpkswss(vector int __a, vector int _ > > > static vector unsigned short __attribute__((__always_inline__)) > > > vec_vpkuwus(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuwus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuwus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_packsu */ > > > @@ -4300,25 +4415,41 @@ vec_vpkuwus(vector unsigned int __a, vec > > > static vector unsigned char __ATTRS_o_ai > > > vec_packsu(vector short __a, vector short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkshus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkshus(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned char __ATTRS_o_ai > > > vec_packsu(vector unsigned short __a, vector unsigned short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuhus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuhus(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_packsu(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkswus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkswus(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_packsu(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuwus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuwus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkshus */ > > > @@ -4326,13 +4457,21 @@ vec_packsu(vector unsigned int __a, vect > > > static vector unsigned char __ATTRS_o_ai > > > vec_vpkshus(vector short __a, vector short __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkshus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkshus(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned char __ATTRS_o_ai > > > vec_vpkshus(vector unsigned short __a, vector unsigned short > > > __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuhus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuhus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_vpkswus */ > > > @@ -4340,13 +4479,21 @@ vec_vpkshus(vector unsigned short __a, v > > > static vector unsigned short __ATTRS_o_ai > > > vec_vpkswus(vector int __a, vector int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkswus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkswus(__a, __b); > > > +#endif > > > } > > > > > > static vector unsigned short __ATTRS_o_ai > > > vec_vpkswus(vector unsigned int __a, vector unsigned int __b) > > > { > > > +#ifdef __LITTLE_ENDIAN__ > > > + return __builtin_altivec_vpkuwus(__b, __a); > > > +#else > > > return __builtin_altivec_vpkuwus(__a, __b); > > > +#endif > > > } > > > > > > /* vec_perm */ > > > > > > > > > _______________________________________________ > > > cfe-commits mailing list > > > [email protected] > > > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits > > > > > > > -- Hal Finkel Assistant Computational Scientist Leadership Computing Facility Argonne National Laboratory _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
