Am Samstag 09 September 2006 12:01 schrieb Matthias Schwarzott: > I did it another way: bisecting like Prakash Punnoor suggested. > Now I found out that cSoftOsd::ScaleDownHoriz_MMX from SoftOsd.c causes the > problem. > If compiling that part with -O2 it does not work and with -O0 it works.
Nice, here is a hackish patch attached, which converts inline asm to intrinsics. Could you test this? It works for me when I play 720p files via softplay. Beware that it will only compile using gcc >4. If devs are OK, with converting inline asm to intrinsics, I could make it clean it up a bit, adding support for gcc <4, as well. Cheers, -- (°= =°) //\ Prakash Punnoor /\\ V_/ \_V
Index: SoftOsd.c
===================================================================
RCS file: /cvsroot/softdevice/softdevice/SoftOsd.c,v
retrieving revision 1.16
diff -u -r1.16 SoftOsd.c
--- SoftOsd.c 10 Jul 2006 18:23:28 -0000 1.16
+++ SoftOsd.c 9 Sep 2006 11:55:07 -0000
@@ -1372,6 +1372,14 @@
unsigned int g_sum=0;
unsigned int r_sum=0;
unsigned int c;
+#else
+#define USE_INTRINSICS
+ typedef int v2si __attribute__ ((vector_size (8)));
+ v2si v6;
+ v2si v1;
+ v2si v2;
+ v2si v0;
+ v2si v7;
#endif
uint32_t new_pixel_width=(OSD_WIDTH*ScaleFactor)/dest_Width;
uint32_t new_pixel_width_rec=(dest_Width*ScaleFactor)/OSD_WIDTH;
@@ -1379,12 +1387,19 @@
int32_t pos=new_pixel_width;
#ifdef USE_MMX2
+#ifndef USE_INTRINSICS
__asm__ __volatile__ (
" pxor %%mm0,%%mm0 \n" //mm0: dest pixel
" movd (%0),%%mm6 \n"
" pshufw $0,%%mm6,%%mm6 \n"// mm6: new_pixel_width_rec
" pxor %%mm7,%%mm7 \n" //mm7: 00 00 00 ...
: : "r" (&new_pixel_width_rec) );
+#else
+ v0 = __builtin_ia32_pxor(v0, v0);
+ v6 = __builtin_ia32_vec_init_v2si(new_pixel_width_rec, 0);
+ v6 = __builtin_ia32_pshufw(v6, 0);
+ v7 = __builtin_ia32_pxor(v7, v7);
+#endif
#endif
SCALEDEBH("OSD_WIDTH: %d dest_width: %d new_pixel_width: %d\n",
OSD_WIDTH,dest_Width,new_pixel_width);
@@ -1402,12 +1417,19 @@
g_sum+=GET_G(c)*ScaleFactor;
r_sum+=GET_R(c)*ScaleFactor;
#else
+#ifndef USE_INTRINSICS
__asm__ __volatile__(
" movd (%0),%%mm1 \n"
" punpcklbw %%mm7, %%mm1 \n"
" psllw $"SHIFT_BITS",%%mm1 \n"
" paddw %%mm1,%%mm0 \n"
: : "r" (pixmap) );
+#else
+ v1 = __builtin_ia32_vec_init_v2si(*pixmap, 0);
+ v1 = __builtin_ia32_punpcklbw(v1, v7);
+ v1 = __builtin_ia32_psllw(v1, SHIFT_BITS_NUM);
+ v0 = __builtin_ia32_paddw(v0, v1);
+#endif
#endif
pos -=ScaleFactor;
@@ -1423,6 +1445,7 @@
g_sum+=GET_G(c)*pos;
r_sum+=GET_R(c)*pos;
#else
+#ifndef USE_INTRINSICS
__asm__ __volatile__(
" movd (%0),%%mm1 \n"
" movd %1,%%mm2 \n"
@@ -1431,6 +1454,14 @@
" pmullw %%mm2,%%mm1 \n"
" paddw %%mm1,%%mm0 \n"
: : "r" (pixmap),"r" (pos) );
+#else
+ v1 = __builtin_ia32_vec_init_v2si(*pixmap, 0);
+ v2 = __builtin_ia32_vec_init_v2si(pos, 0);
+ v1 = __builtin_ia32_punpcklbw(v1, v7);
+ v2 = __builtin_ia32_pshufw(v2, 0);
+ v1 = __builtin_ia32_pmullw(v1, v2);
+ v0 = __builtin_ia32_paddw(v0, v1);
+#endif
#endif
SCALEDEBH("a_sum: %d new_pixel_width_rec: %d a pixel: %d",
@@ -1452,6 +1483,7 @@
dest[3]=a_sum;*/
a_sum=b_sum=g_sum=r_sum=0;
#else
+#ifndef USE_INTRINSICS
__asm__ __volatile__ (
" psrlw $"SHIFT_BITS",%%mm0 \n"
" pmullw %%mm6,%%mm0 \n"
@@ -1460,6 +1492,14 @@
" movd %%mm0,(%0) \n"
" pxor %%mm0,%%mm0 \n"
: : "r"(dest) );
+#else
+ v0 = __builtin_ia32_psrlw(v0, SHIFT_BITS_NUM);
+ v0 = __builtin_ia32_pmullw(v0, v6);
+ v0 = __builtin_ia32_psrlw(v0, SHIFT_BITS_NUM);
+ v0 = __builtin_ia32_packuswb(v0, v0);
+ *dest = __builtin_ia32_vec_ext_v2si(v0, 0);
+ v0 = __builtin_ia32_pxor(v0, v0);
+#endif
#endif
SCALEDEBH(", %d, %d, %d\n",r_sum,g_sum,b_sum);
dest++;
@@ -1478,6 +1518,7 @@
g_sum=GET_G(c)*apos;
r_sum=GET_R(c)*apos;
#else
+#ifndef USE_INTRINSICS
__asm__ __volatile__ (
" movd (%0),%%mm1 \n"
" movd %1,%%mm2 \n"
@@ -1486,6 +1527,14 @@
" pmullw %%mm2,%%mm1 \n"
" paddw %%mm1,%%mm0 \n"
: : "r" (pixmap),"r" (apos) );
+#else
+ v1 = __builtin_ia32_vec_init_v2si(*pixmap, 0);
+ v2 = __builtin_ia32_vec_init_v2si(apos, 0);
+ v1 = __builtin_ia32_punpcklbw(v1, v7);
+ v2 = __builtin_ia32_pshufw(v2, 0x0);
+ v1 = __builtin_ia32_pmullw(v1, v2);
+ v0 = __builtin_ia32_paddw(v0, v1);
+#endif
#endif
pixmap++;
pos += new_pixel_width;
pgph8FOdn4UYj.pgp
Description: PGP signature
_______________________________________________ Softdevice-devel mailing list [email protected] https://lists.berlios.de/mailman/listinfo/softdevice-devel
