This code is not faster, therefore it just needlessly complicates
the source without providing any apparent benefit.

Profiling YUV-to-RGB32 conversion:
mmx:
6041918 dezicycles in x, 8192 runs, 0 skips
6084080 dezicycles in x, 8192 runs, 0 skips
6051309 dezicycles in x, 8192 runs, 0 skips
6073144 dezicycles in x, 8192 runs, 0 skips
(6062613 avg)
mmx2:
6071750 dezicycles in x, 8192 runs, 0 skips
6050798 dezicycles in x, 8192 runs, 0 skips
6070844 dezicycles in x, 8192 runs, 0 skips
6057621 dezicycles in x, 8192 runs, 0 skips
(6062753 avg)
---
 libswscale/x86/swscale_template.c |   10 ----------
 1 files changed, 0 insertions(+), 10 deletions(-)

diff --git a/libswscale/x86/swscale_template.c 
b/libswscale/x86/swscale_template.c
index e03fbd4..581f2da 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -24,17 +24,8 @@
 #undef MOVNTQ
 #undef PREFETCH
 
-#if COMPILE_TEMPLATE_MMX2
-#define PREFETCH "prefetchnta"
-#else
 #define PREFETCH  " # nop"
-#endif
-
-#if COMPILE_TEMPLATE_MMX2
-#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
-#else
 #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
-#endif
 #define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
 
 #define YSCALEYUV2YV12X(x, offset, dest, width) \
@@ -2493,7 +2484,6 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* 
src[], int srcStride[],
     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
 
-    if (COMPILE_TEMPLATE_MMX2)      __asm__ volatile("sfence":::"memory");
     __asm__ volatile("emms"  :::"memory");
 
     /* store changed local vars back in the context */
-- 
1.7.4.4

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to