Provides a SPLATD_LOW macro that can either splat in-place or load/splat from
a 32-bit value in memory.
---
libavcodec/x86/dsputil_yasm.asm | 13 ++++---------
libavcodec/x86/fmtconvert.asm | 28 +++++++++++++---------------
libavutil/x86/x86util.asm | 25 +++++++++++++++++--------
3 files changed, 34 insertions(+), 32 deletions(-)
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 344a0fe..07d011d 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -1059,16 +1059,13 @@ emu_edge mmx
%macro VECTOR_CLIP_INT32 2
cglobal vector_clip_int32, 5,5,11, dst, src, min, max, len
+ SPLATD_LOW m4, minm
+ SPLATD_LOW m5, maxm
%if notcpuflag(sse4) && cpuflag(sse2) && notcpuflag(atom)
- cvtsi2ss m4, minm
- cvtsi2ss m5, maxm
+ cvtdq2ps m4, m4
+ cvtdq2ps m5, m5
%define CLIPD CLIPD_FLT
-%else
- movd m4, minm
- movd m5, maxm
%endif
- SPLATD m4
- SPLATD m5
.loop:
%assign %%i 1
%rep %1
@@ -1113,10 +1110,8 @@ cglobal vector_clip_int32, 5,5,11, dst, src, min, max,
len
%endmacro
INIT_MMX mmx
-%define SPLATD SPLATD_MMX
VECTOR_CLIP_INT32 1, 0
INIT_XMM sse2,atom
-%define SPLATD SPLATD_SSE2
VECTOR_CLIP_INT32 1, 0
INIT_XMM sse2
VECTOR_CLIP_INT32 2, 0
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 4916e7a..e0bde10 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -27,18 +27,20 @@ SECTION_TEXT
;---------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int
len);
;---------------------------------------------------------------------------------
-%macro INT32_TO_FLOAT_FMUL_SCALAR 2
+%macro INT32_TO_FLOAT_FMUL_SCALAR 0
%if UNIX64
-cglobal int32_to_float_fmul_scalar_%1, 3,3,%2, dst, src, len
+cglobal int32_to_float_fmul_scalar, 3,3,5, dst, src, len
%else
-cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src, mul, len
+cglobal int32_to_float_fmul_scalar, 4,4,5, dst, src, mul, len
%endif
-%if WIN64
+%if ARCH_X86_32
+ SPLATD_LOW m0, mulm
+%else
+ %if WIN64
SWAP 0, 2
-%elif ARCH_X86_32
- movss m0, mulm
+ %endif
+ SPLATD_LOW m0
%endif
- SPLATD m0
shl lenq, 2
add srcq, lenq
add dstq, lenq
@@ -64,14 +66,10 @@ cglobal int32_to_float_fmul_scalar_%1, 4,4,%2, dst, src,
mul, len
REP_RET
%endmacro
-INIT_XMM
-%define SPLATD SPLATD_SSE
-%define movdqa movaps
-INT32_TO_FLOAT_FMUL_SCALAR sse, 5
-%undef movdqa
-%define SPLATD SPLATD_SSE2
-INT32_TO_FLOAT_FMUL_SCALAR sse2, 3
-%undef SPLATD
+INIT_XMM sse
+INT32_TO_FLOAT_FMUL_SCALAR
+INIT_XMM sse2
+INT32_TO_FLOAT_FMUL_SCALAR
;------------------------------------------------------------------------------
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index a623f82..2ac792f 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -567,16 +567,25 @@
%endif
%endmacro
-%macro SPLATD_MMX 1
+; splat low dword or m32 to all dwords
+; %1 = dst/src mmreg, %2 = src m32 (optional)
+%macro SPLATD_LOW 1-2
+%if %0 > 1 && notcpuflag(avx)
+ %if mmsize == 8 || cpuflag(sse2)
+ movd %1, %2
+ %else
+ movss %1, %2
+ %endif
+%endif
+%if mmsize == 8
punpckldq %1, %1
-%endmacro
-
-%macro SPLATD_SSE 1
- shufps %1, %1, 0
-%endmacro
-
-%macro SPLATD_SSE2 1
+%elif %0 > 1 && cpuflag(avx)
+ vbroadcastss %1, %2
+%elif cpuflag(sse2)
pshufd %1, %1, 0
+%else ; sse
+ shufps %1, %1, 0
+%endif
%endmacro
%macro CLIPW 3 ;(dst, min, max)
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel