---
libavcodec/x86/h264_intrapred.asm | 129 ++++++------------------------------
libavcodec/x86/vp8dsp.asm | 21 ------
libavutil/x86/x86util.asm | 41 +++++++++++-
3 files changed, 59 insertions(+), 132 deletions(-)
diff --git a/libavcodec/x86/h264_intrapred.asm
b/libavcodec/x86/h264_intrapred.asm
index 87d32c4..5984454 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -103,15 +103,8 @@ cglobal pred16x16_horizontal, 2,3
%else
punpcklbw m0, m0
punpcklbw m1, m1
-%if cpuflag(mmx2)
- pshufw m0, m0, 0xff
- pshufw m1, m1, 0xff
-%else
- punpckhwd m0, m0
- punpckhwd m1, m1
- punpckhdq m0, m0
- punpckhdq m1, m1
-%endif
+ SPLATW m0, m0, 3
+ SPLATW m1, m1, 3
mova [r0+r1*0+8], m0
mova [r0+r1*1+8], m1
%endif
@@ -162,18 +155,8 @@ cglobal pred16x16_dc, 2,7
shr r2d, 5
%if cpuflag(ssse3)
pxor m1, m1
- movd m0, r2d
- pshufb m0, m1
-%elif cpuflag(sse2)
- movd m0, r2d
- punpcklbw m0, m0
- pshuflw m0, m0, 0
- punpcklqdq m0, m0
-%elif cpuflag(mmx2)
- movd m0, r2d
- punpcklbw m0, m0
- pshufw m0, m0, 0
%endif
+ SPLATB_REG m0, r2d, m1
%if mmsize==8
mov r3d, 8
@@ -227,12 +210,7 @@ cglobal pred16x16_tm_vp8, 2,5
movzx r2d, byte [r0+r1-1]
sub r2d, r3d
movd mm4, r2d
-%if cpuflag(mmx2)
- pshufw mm4, mm4, 0
-%else
- punpcklwd mm4, mm4
- punpckldq mm4, mm4
-%endif
+ SPLATW mm4, mm4, 0
movq mm5, mm4
movq mm6, mm4
movq mm7, mm4
@@ -332,19 +310,15 @@ cglobal pred16x16_plane_%1, 2,9,7
movhlps m1, m0
%endif
paddw m0, m1
-%if cpuflag(sse2)
- pshuflw m1, m0, 0xE
-%elif cpuflag(mmx2)
- pshufw m1, m0, 0xE
+%if cpuflag(mmx2)
+ PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx)
mova m1, m0
psrlq m1, 32
%endif
paddw m0, m1
-%if cpuflag(sse2)
- pshuflw m1, m0, 0x1
-%elif cpuflag(mmx2)
- pshufw m1, m0, 0x1
+%if cpuflag(mmx2)
+ PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx)
mova m1, m0
psrlq m1, 16
@@ -483,25 +457,9 @@ cglobal pred16x16_plane_%1, 2,9,7
movd m1, r5d
movd m3, r3d
-%if cpuflag(sse2)
- pshuflw m0, m0, 0x0
- pshuflw m1, m1, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m0, m0 ; splat H (words)
- punpcklqdq m1, m1 ; splat V (words)
- punpcklqdq m3, m3 ; splat a (words)
-%elif cpuflag(mmx2)
- pshufw m0, m0, 0x0
- pshufw m1, m1, 0x0
- pshufw m3, m3, 0x0
-%elif cpuflag(mmx)
- punpcklwd m0, m0
- punpcklwd m1, m1
- punpcklwd m3, m3
- punpckldq m0, m0
- punpckldq m1, m1
- punpckldq m3, m3
-%endif
+ SPLATW m0, m0, 0 ; H
+ SPLATW m1, m1, 0 ; V
+ SPLATW m3, m3, 0 ; a
%ifidn %1, svq3
SWAP 0, 1
%endif
@@ -626,10 +584,8 @@ cglobal pred8x8_plane, 2,9,7
paddw m0, m1
%if notcpuflag(ssse3)
-%if cpuflag(sse2) ; mmsize == 16
- pshuflw m1, m0, 0xE
-%elif cpuflag(mmx2)
- pshufw m1, m0, 0xE
+%if cpuflag(mmx2)
+ PSHUFLW m1, m0, 0xE
%elif cpuflag(mmx)
mova m1, m0
psrlq m1, 32
@@ -637,10 +593,8 @@ cglobal pred8x8_plane, 2,9,7
paddw m0, m1
%endif ; !ssse3
-%if cpuflag(sse2)
- pshuflw m1, m0, 0x1
-%elif cpuflag(mmx2)
- pshufw m1, m0, 0x1
+%if cpuflag(mmx2)
+ PSHUFLW m1, m0, 0x1
%elif cpuflag(mmx)
mova m1, m0
psrlq m1, 16
@@ -711,25 +665,9 @@ cglobal pred8x8_plane, 2,9,7
movd m1, r5d
movd m3, r3d
-%if cpuflag(sse2)
- pshuflw m0, m0, 0x0
- pshuflw m1, m1, 0x0
- pshuflw m3, m3, 0x0
- punpcklqdq m0, m0 ; splat H (words)
- punpcklqdq m1, m1 ; splat V (words)
- punpcklqdq m3, m3 ; splat a (words)
-%elif cpuflag(mmx2)
- pshufw m0, m0, 0x0
- pshufw m1, m1, 0x0
- pshufw m3, m3, 0x0
-%elif cpuflag(mmx)
- punpcklwd m0, m0
- punpcklwd m1, m1
- punpcklwd m3, m3
- punpckldq m0, m0
- punpckldq m1, m1
- punpckldq m3, m3
-%endif
+ SPLATW m0, m0, 0 ; H
+ SPLATW m1, m1, 0 ; V
+ SPLATW m3, m3, 0 ; a
%if mmsize == 8
mova m2, m0
%endif
@@ -815,24 +753,8 @@ cglobal pred8x8_horizontal, 2,3
mova m2, [pb_3]
%endif
.loop:
- movd m0, [r0+r1*0-4]
- movd m1, [r0+r1*1-4]
-%if cpuflag(ssse3)
- pshufb m0, m2
- pshufb m1, m2
-%else
- punpcklbw m0, m0
- punpcklbw m1, m1
-%if cpuflag(mmx2)
- pshufw m0, m0, 0xff
- pshufw m1, m1, 0xff
-%else
- punpckhwd m0, m0
- punpckhwd m1, m1
- punpckhdq m0, m0
- punpckhdq m1, m1
-%endif
-%endif
+ SPLATB_LOAD m0, r0+r1*0-1, m2
+ SPLATB_LOAD m1, r0+r1*1-1, m2
mova [r0+r1*0], m0
mova [r0+r1*1], m1
lea r0, [r0+r1*2]
@@ -1000,15 +922,8 @@ cglobal pred8x8_tm_vp8, 2,6
sub r3d, r4d
movd mm2, r2d
movd mm4, r3d
-%if cpuflag(mmx2)
- pshufw mm2, mm2, 0
- pshufw mm4, mm4, 0
-%else
- punpcklwd mm2, mm2
- punpcklwd mm4, mm4
- punpckldq mm2, mm2
- punpckldq mm4, mm4
-%endif
+ SPLATW mm2, mm2, 0
+ SPLATW mm4, mm4, 0
movq mm3, mm2
movq mm5, mm4
paddw mm2, mm0
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 82f21fe..531b205 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -1465,27 +1465,6 @@ VP8_DC_WHT
%endif
%endmacro
-%macro SPLATB_REG 2-3
-%if cpuflag(ssse3)
- movd %1, %2d
- pshufb %1, %3
-%elif cpuflag(sse2)
- movd %1, %2d
- punpcklbw %1, %1
- pshuflw %1, %1, 0x0
- punpcklqdq %1, %1
-%elif cpuflag(mmx2)
- movd %1, %2d
- punpcklbw %1, %1
- pshufw %1, %1, 0x0
-%else
- movd %1, %2d
- punpcklbw %1, %1
- punpcklwd %1, %1
- punpckldq %1, %1
-%endif
-%endmacro
-
%macro SIMPLE_LOOPFILTER 2
cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
%if mmsize == 8 ; mmx/mmxext
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index 066384b..941ec76 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -256,15 +256,26 @@
%define ABSB ABSB_MMX
%define ABSB2 ABSB2_MMX
-%macro SPLATB_MMX 3
+%macro SPLATB_LOAD 3
+%if cpuflag(ssse3)
+ movd %1, [%2-3]
+ pshufb %1, %3
+%else
movd %1, [%2-3] ;to avoid crossing a cacheline
punpcklbw %1, %1
SPLATW %1, %1, 3
+%endif
%endmacro
-%macro SPLATB_SSSE3 3
- movd %1, [%2-3]
+%macro SPLATB_REG 3
+%if cpuflag(ssse3)
+ movd %1, %2d
pshufb %1, %3
+%else
+ movd %1, %2d
+ punpcklbw %1, %1
+ SPLATW %1, %1, 0
+%endif
%endmacro
%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp
@@ -296,6 +307,14 @@
%endif
%endmacro
+%macro PSHUFLW 1+
+ %if mmsize == 8
+ pshufw %1
+ %else
+ pshuflw %1
+ %endif
+%endmacro
+
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5
pand m%3, m%5, m%4 ; src .. y6 .. y4
@@ -521,8 +540,22 @@
%if mmsize == 16
pshuflw %1, %2, (%3)*0x55
punpcklqdq %1, %1
-%else
+%elif cpuflag(mmx2)
pshufw %1, %2, (%3)*0x55
+%else
+ %ifnidn %1, %2
+ mova %1, %2
+ %endif
+ %if %3 & 2
+ punpckhwd %1, %1
+ %else
+ punpcklwd %1, %1
+ %endif
+ %if %3 & 1
+ punpckhwd %1, %1
+ %else
+ punpcklwd %1, %1
+ %endif
%endif
%endmacro
--
1.7.4.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel