Eliminate a few instructions and simplify. No effect on speed though. mmx
should be memory-bound.
--Loren Merritt
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 73b3763..f5115b3 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -20,6 +20,7 @@
;******************************************************************************
%include "x86inc.asm"
+%include "x86util.asm"
section .text align=16
@@ -152,24 +153,15 @@ cglobal float_interleave6_%1, 2,7,7, dst, src, src1,
src2, src3, src4, src5
movq m3, [srcq+src3q]
movq m4, [srcq+src4q]
movq m5, [srcq+src5q]
-
- movq m6, m0
- punpckldq m6, m1
- movq m7, m2
- punpckldq m7, m3
- movq [dstq ], m6
- movq [dstq+8], m7
-
- movq m6, m4
- punpckldq m6, m5
- punpckhdq m0, m1
- movq [dstq+16], m6
- movq [dstq+24], m0
-
- punpckhdq m2, m3
- punpckhdq m4, m5
- movq [dstq+32], m2
- movq [dstq+40], m4
+ SBUTTERFLY dq, 0, 1, 6
+ SBUTTERFLY dq, 2, 3, 6
+ SBUTTERFLY dq, 4, 5, 6
+ movq [dstq ], m0
+ movq [dstq+ 8], m2
+ movq [dstq+16], m4
+ movq [dstq+24], m1
+ movq [dstq+32], m3
+ movq [dstq+40], m5
%endif
add srcq, mmsize
add dstq, mmsize*6
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel