---
libavcodec/x86/dsputil_yasm.asm | 41 ++++++++++++++++++++------------------
1 files changed, 22 insertions(+), 19 deletions(-)
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index d42d24b..373e430 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -456,28 +456,31 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src,
w, left
ADD_HFYU_LEFT_LOOP 0
-; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
+;------------------------------------------------------------------------------
+; float ff_scalarproduct_float(const float *v1, const float *v2, int len)
+;------------------------------------------------------------------------------
+
INIT_XMM sse
cglobal scalarproduct_float, 3,3,2, v1, v2, offset
- neg offsetq
- shl offsetq, 2
- sub v1q, offsetq
- sub v2q, offsetq
- xorps m0, m0
- .loop:
- movaps m1, [v1q+offsetq]
- mulps m1, [v2q+offsetq]
- addps m0, m1
- add offsetq, 16
- js .loop
- movhlps m1, m0
- addps m0, m1
- movss m1, m0
- shufps m0, m0, 1
- addss m0, m1
+ neg offsetq
+ shl offsetq, 2
+ sub v1q, offsetq
+ sub v2q, offsetq
+ xorps m0, m0
+.loop:
+ movaps m1, [v1q+offsetq]
+ mulps m1, [v2q+offsetq]
+ addps m0, m1
+ add offsetq, 16
+ js .loop
+ movhlps m1, m0
+ addps m0, m1
+ movss m1, m0
+ shufps m0, m0, 1
+ addss m0, m1
%ifndef ARCH_X86_64
- movd r0m, m0
- fld dword r0m
+ movd r0m, m0
+ fld dword r0m
%endif
RET
--
1.7.1
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel