On Thu, Oct 16, 2014 at 10:48:14AM +0800, rongyan wrote: > Hi, > I created a patch to fix the bug in file libswscale/ppc/swscale_altivec.c > for POWER LE. The fixed functions including 'hScale_altivec_real()', > 'yuv2planeX_16_altivec()', and 'yuv2planeX_8()'. The fate test result can be > found on http://fate.ffmpeg.org/ by search "ibmcrl", also attached here to > facilitate the review:
[...] > +#if !HAVE_BIGENDIAN > +#define yuv2planeX_8(d1, d2, l1, src, x, filter) do { \ > + vector signed int i1 = vec_mule(filter, l1); \ > + vector signed int i2 = vec_mulo(filter, l1); \ > + vector signed int vf1 = vec_mergel(i2, i1); \ > + vector signed int vf2 = vec_mergeh(i2, i1); \ > + d1 = vec_add(d1, vf1); \ > + d2 = vec_add(d2, vf2); \ > + } while (0) > +#else > #define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do { \ > vector signed short l2 = vec_ld(((x) << 1) + 16, src); \ > vector signed short ls = vec_perm(l1, l2, perm); \ > @@ -44,11 +54,49 @@ > d2 = vec_add(d2, vf2); \ > l1 = l2; \ > } while (0) > +#endif > > static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize, > const int16_t **src, uint8_t *dest, > const uint8_t *dither, int offset, int x) > { > +#if !HAVE_BIGENDIAN > + register int i, j; > + DECLARE_ALIGNED(16, int, val)[16]; > + vector signed int vo1, vo2, vo3, vo4; > + vector unsigned short vs1, vs2; > + vector unsigned char vf; > + vector unsigned int altivec_vectorShiftInt19 = > + vec_add(vec_splat_u32(10), vec_splat_u32(9)); > + > + for (i = 0; i < 16; i++) > + val[i] = dither[(x + i + offset) & 7] << 12; > + > + vo1 = vec_ld(0, val); > + vo2 = vec_ld(16, val); > + vo3 = vec_ld(32, val); > + vo4 = vec_ld(48, val); > + > + for (j = 0; j < filterSize; j++) { > + vector signed short l1, l2, vLumFilter = vec_vsx_ld(j << 1, filter); > + vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8 > times in vLumFilter > + > + l1 = vec_vsx_ld(x << 1, src[j]); > + l2 = vec_vsx_ld(((x) << 1) + 16, src[j]); > + > + yuv2planeX_8(vo1, vo2, l1, src[j], x, vLumFilter); > + yuv2planeX_8(vo3, vo4, l2, src[j], x + 8, vLumFilter); > + } > + > + vo1 = vec_sra(vo1, altivec_vectorShiftInt19); > + vo2 = vec_sra(vo2, altivec_vectorShiftInt19); > + vo3 = vec_sra(vo3, altivec_vectorShiftInt19); > + vo4 = vec_sra(vo4, altivec_vectorShiftInt19); > + vs1 = vec_packsu(vo1, vo2); > + vs2 = vec_packsu(vo3, vo4); > + vf = vec_packsu(vs1, vs2); > + vec_vsx_st(vf, 0, dest); > +#else /* else of #if !HAVE_BIGENDIAN */ > register int i, j; > DECLARE_ALIGNED(16, int, val)[16]; > vector signed int vo1, vo2, vo3, vo4; code duplication, this is identical to the code in the #if > @@ -86,6 +134,7 @@ static void yuv2planeX_16_altivec(const int16_t *filter, > int filterSize, > vs2 = vec_packsu(vo3, vo4); > vf = vec_packsu(vs1, vs2); > vec_st(vf, 0, dest); this is identical as well, except vec_st, the following avoids more code duplication: #if HAVE_VSX # define VEC_ST vec_vsx_st #else # define VEC_ST vec_st) #endif similar for the other functions, please dont duplicate code unless there is a reason [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Into a blind darkness they enter who follow after the Ignorance, they as if into a greater darkness enter who devote themselves to the Knowledge alone. -- Isha Upanishad
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel