Provides a SPLATD_LOW macro that can either splat in-place or load/splat from
a 32-bit value in memory.
---
 libavcodec/x86/dsputil_yasm.asm |   13 ++++---------
 libavcodec/x86/fmtconvert.asm   |   28 +++++++++++++---------------
 libavutil/x86/x86util.asm       |   25 +++++++++++++++++--------
 3 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 344a0fe..07d011d 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1059,16 +1059,13 @@ emu_edge mmx
 
 %macro VECTOR_CLIP_INT32 2
 cglobal vector_clip_int32, 5,5,11, dst, src, min, max, len
+    SPLATD_LOW m4, minm
+    SPLATD_LOW m5, maxm
 %if notcpuflag(sse4) && cpuflag(sse2) && notcpuflag(atom)
-    cvtsi2ss  m4, minm
-    cvtsi2ss  m5, maxm
+    cvtdq2ps   m4, m4
+    cvtdq2ps   m5, m5
     %define CLIPD CLIPD_FLT
-%else
-    movd      m4, minm
-    movd      m5, maxm
 %endif
-    SPLATD    m4
-    SPLATD    m5
 .loop:
 %assign %%i 1
 %rep %1
@@ -1113,10 +1110,8 @@ cglobal vector_clip_int32, 5,5,11, dst, src, min, max, 
len
 %endmacro
 
 INIT_MMX mmx
-%define SPLATD SPLATD_MMX
 VECTOR_CLIP_INT32 1, 0
 INIT_XMM sse2,atom
-%define SPLATD SPLATD_SSE2
 VECTOR_CLIP_INT32 1, 0
 INIT_XMM sse2
 VECTOR_CLIP_INT32 2, 0
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 4916e7a..e0bde10 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -27,18 +27,20 @@ SECTION_TEXT
 
;---------------------------------------------------------------------------------
 ; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int 
len);
 
;---------------------------------------------------------------------------------
-%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%macro INT32_TO_FLOAT_FMUL_SCALAR 0
 %if UNIX64
-cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+cglobal int32_to_float_fmul_scalar, 3,3,5, dst, src, len
 %else
-cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+cglobal int32_to_float_fmul_scalar, 4,4,5, dst, src, mul, len
 %endif
-%if WIN64
+%if ARCH_X86_32
+    SPLATD_LOW m0, mulm
+%else
+    %if WIN64
     SWAP 0, 2
-%elif ARCH_X86_32
-    movss   m0, mulm
+    %endif
+    SPLATD_LOW m0
 %endif
-    SPLATD  m0
     shl     lenq, 2
     add     srcq, lenq
     add     dstq, lenq
@@ -64,14 +66,10 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, 
mul, len
     REP_RET
 %endmacro
 
-INIT_XMM
-%define SPLATD SPLATD_SSE
-%define movdqa movaps
-INT32_TO_FLOAT_FMUL_SCALAR sse, 5
-%undef movdqa
-%define SPLATD SPLATD_SSE2
-INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
-%undef SPLATD
+INIT_XMM sse
+INT32_TO_FLOAT_FMUL_SCALAR
+INIT_XMM sse2
+INT32_TO_FLOAT_FMUL_SCALAR
 
 
 ;------------------------------------------------------------------------------
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index a623f82..2ac792f 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -567,16 +567,25 @@
 %endif
 %endmacro
 
-%macro SPLATD_MMX 1
+; splat low dword or m32 to all dwords
+; %1 = dst/src mmreg, %2 = src m32 (optional)
+%macro SPLATD_LOW 1-2
+%if %0 > 1 && notcpuflag(avx)
+    %if mmsize == 8 || cpuflag(sse2)
+    movd   %1, %2
+    %else
+    movss  %1, %2
+    %endif
+%endif
+%if mmsize == 8
     punpckldq  %1, %1
-%endmacro
-
-%macro SPLATD_SSE 1
-    shufps  %1, %1, 0
-%endmacro
-
-%macro SPLATD_SSE2 1
+%elif %0 > 1 && cpuflag(avx)
+    vbroadcastss %1, %2
+%elif cpuflag(sse2)
     pshufd  %1, %1, 0
+%else ; sse
+    shufps  %1, %1, 0
+%endif
 %endmacro
 
 %macro CLIPW 3 ;(dst, min, max)
-- 
1.7.1

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to