On Thu, Oct 16, 2014 at 10:48:14AM +0800, rongyan wrote:
Hi,
I created a patch to fix the bug in file libswscale/ppc/swscale_altivec.c
for POWER LE. The fixed functions including 'hScale_altivec_real()',
'yuv2planeX_16_altivec()', and 'yuv2planeX_8()'. The fate test result can be
found on http://fate.ffmpeg.org/ by search ibmcrl, also attached here to
facilitate the review:
[...]
+#if !HAVE_BIGENDIAN
+#define yuv2planeX_8(d1, d2, l1, src, x, filter) do { \
+vector signed int i1 = vec_mule(filter, l1); \
+vector signed int i2 = vec_mulo(filter, l1); \
+vector signed int vf1 = vec_mergel(i2, i1); \
+vector signed int vf2 = vec_mergeh(i2, i1); \
+d1 = vec_add(d1, vf1); \
+d2 = vec_add(d2, vf2); \
+} while (0)
+#else
#define yuv2planeX_8(d1, d2, l1, src, x, perm, filter) do { \
vector signed short l2 = vec_ld(((x) 1) + 16, src); \
vector signed short ls = vec_perm(l1, l2, perm); \
@@ -44,11 +54,49 @@
d2 = vec_add(d2, vf2); \
l1 = l2;\
} while (0)
+#endif
static void yuv2planeX_16_altivec(const int16_t *filter, int filterSize,
const int16_t **src, uint8_t *dest,
const uint8_t *dither, int offset, int x)
{
+#if !HAVE_BIGENDIAN
+register int i, j;
+DECLARE_ALIGNED(16, int, val)[16];
+vector signed int vo1, vo2, vo3, vo4;
+vector unsigned short vs1, vs2;
+vector unsigned char vf;
+vector unsigned int altivec_vectorShiftInt19 =
+vec_add(vec_splat_u32(10), vec_splat_u32(9));
+
+for (i = 0; i 16; i++)
+val[i] = dither[(x + i + offset) 7] 12;
+
+vo1 = vec_ld(0, val);
+vo2 = vec_ld(16, val);
+vo3 = vec_ld(32, val);
+vo4 = vec_ld(48, val);
+
+for (j = 0; j filterSize; j++) {
+vector signed short l1, l2, vLumFilter = vec_vsx_ld(j 1, filter);
+vLumFilter = vec_splat(vLumFilter, 0); // lumFilter[j] is loaded 8
times in vLumFilter
+
+l1 = vec_vsx_ld(x 1, src[j]);
+l2 = vec_vsx_ld(((x) 1) + 16, src[j]);
+
+yuv2planeX_8(vo1, vo2, l1, src[j], x, vLumFilter);
+yuv2planeX_8(vo3, vo4, l2, src[j], x + 8, vLumFilter);
+}
+
+vo1 = vec_sra(vo1, altivec_vectorShiftInt19);
+vo2 = vec_sra(vo2, altivec_vectorShiftInt19);
+vo3 = vec_sra(vo3, altivec_vectorShiftInt19);
+vo4 = vec_sra(vo4, altivec_vectorShiftInt19);
+vs1 = vec_packsu(vo1, vo2);
+vs2 = vec_packsu(vo3, vo4);
+vf = vec_packsu(vs1, vs2);
+vec_vsx_st(vf, 0, dest);
+#else /* else of #if !HAVE_BIGENDIAN */
register int i, j;
DECLARE_ALIGNED(16, int, val)[16];
vector signed int vo1, vo2, vo3, vo4;
code duplication, this is identical to the code in the #if
@@ -86,6 +134,7 @@ static void yuv2planeX_16_altivec(const int16_t *filter,
int filterSize,
vs2 = vec_packsu(vo3, vo4);
vf = vec_packsu(vs1, vs2);
vec_st(vf, 0, dest);
this is identical as well, except vec_st, the following avoids more
code duplication:
#if HAVE_VSX
#define VEC_ST vec_vsx_st
#else
#define VEC_ST vec_st)
#endif
similar for the other functions, please dont duplicate code
unless there is a reason
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad
signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel