Provides a SPLATD_LOW macro that can either splat in-place or load/splat from
a 32-bit value in memory.
---
libavcodec/x86/dsputil_yasm.asm | 10 +++-----
libavcodec/x86/fmtconvert.asm | 28 ++++++++++++--------------
libavutil/x86/x86util.asm | 40 ++++++++++++++++++++++++--------------
libswscale/x86/output.asm | 2 +-
4 files changed, 43 insertions(+), 37 deletions(-)
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 746ba69..4e662d9 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1062,14 +1062,14 @@ cglobal vector_clip_int32, 5,5,11, dst, src, min, max,
len
%if notcpuflag(sse4) && cpuflag(sse2) && notcpuflag(atom)
cvtsi2ss m4, minm
cvtsi2ss m5, maxm
+ SPLATD m4
+ SPLATD m5
%assign is_float 1
%else
- movd m4, minm
- movd m5, maxm
+ SPLATD m4, minm
+ SPLATD m5, maxm
%assign is_float 0
%endif
- SPLATD m4
- SPLATD m5
.loop:
%assign %%i 1
%rep %1
@@ -1113,10 +1113,8 @@ cglobal vector_clip_int32, 5,5,11, dst, src, min, max,
len
%endmacro
INIT_MMX mmx
-%define SPLATD SPLATD_MMX
VECTOR_CLIP_INT32 1, 0
INIT_XMM sse2,atom
-%define SPLATD SPLATD_SSE2
VECTOR_CLIP_INT32 1, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 2, 0
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 4916e7a..2660361 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -27,18 +27,20 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int
len);
;---------------------------------------------------------------------------------
-%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%macro INT32_TO_FLOAT_FMUL_SCALAR 0
%if UNIX64
-cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+cglobal int32_to_float_fmul_scalar, 3,3,5, dst, src, len
%else
-cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+cglobal int32_to_float_fmul_scalar, 4,4,5, dst, src, mul, len
%endif
-%if WIN64
+%if ARCH_X86_32
+ SPLATD m0, mulm
+%else
+ %if WIN64
SWAP 0, 2
-%elif ARCH_X86_32
- movss m0, mulm
+ %endif
+ SPLATD m0
%endif
- SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
@@ -64,14 +66,10 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src,
mul, len
REP_RET
%endmacro
-INIT_XMM
-%define SPLATD SPLATD_SSE
-%define movdqa movaps
-INT32_TO_FLOAT_FMUL_SCALAR sse, 5
-%undef movdqa
-%define SPLATD SPLATD_SSE2
-INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
-%undef SPLATD
+INIT_XMM sse
+INT32_TO_FLOAT_FMUL_SCALAR
+INIT_XMM sse2
+INT32_TO_FLOAT_FMUL_SCALAR
;------------------------------------------------------------------------------
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index a33858b..654aba4 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -559,24 +559,34 @@
%endif
%endmacro
-%macro SPLATD 2-3 0
-%if mmsize == 16
- pshufd %1, %2, (%3)*0x55
-%else
- pshufw %1, %2, (%3)*0x11 + ((%3)+1)*0x44
+; splat low dword or m32 to all dwords
+; %1 = dst/src mmreg, %2 = src m32 (optional)
+%macro SPLATD 1-2
+%if %0 > 1 && notcpuflag(avx)
+ %if mmsize == 8 || cpuflag(sse2)
+ movd %1, %2
+ %else
+ movss %1, %2
+ %endif
%endif
-%endmacro
-
-%macro SPLATD_MMX 1
+%if mmsize == 8
punpckldq %1, %1
-%endmacro
-
-%macro SPLATD_SSE 1
- shufps %1, %1, 0
-%endmacro
-
-%macro SPLATD_SSE2 1
+%elif mmsize == 16
+ %if %0 > 1 && cpuflag(avx)
+ vbroadcastss %1, %2
+ %elif cpuflag(sse2)
pshufd %1, %1, 0
+ %else ; sse
+ shufps %1, %1, 0
+ %endif
+%elif mmsize == 32
+ %if %0 > 1
+ vbroadcastss %1, %2
+ %else
+ vperm2f128 %1, %1, %1, 0
+ shufps %1, %1, 0
+ %endif
+%endif
%endmacro
%macro CLIPW 3 ;(dst, min, max)
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9b0b012..b970792 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -188,7 +188,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src,
dst, w, dither, offset
%else ; %1 == 10/9/8
punpcklwd m5, m3, m4
punpckhwd m3, m4
- SPLATD m0, m0
+ SPLATD m0
pmaddwd m5, m0
pmaddwd m3, m0
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel