ffmpeg | branch: master | James Almer <jamr...@gmail.com> | Fri Jun 2 19:17:28 2017 -0300| [be3809a521fecfd3a61db99d660f243bd32b30bb] | committer: James Almer
x86/aacpsdsp: optimize ff_ps_stereo_interpolate_sse3 Move the unpacking outside of the loop. 5% to 10% faster. Suggested-by: ubitux Signed-off-by: James Almer <jamr...@gmail.com> > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=be3809a521fecfd3a61db99d660f243bd32b30bb --- libavcodec/x86/aacpsdsp.asm | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm index bb8a7f5df0..4548bb4257 100644 --- a/libavcodec/x86/aacpsdsp.asm +++ b/libavcodec/x86/aacpsdsp.asm @@ -93,6 +93,10 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n movaps m1, [h_stepq] cmp nd, 0 jle .ret + unpcklps m4, m0, m0 + unpckhps m0, m0 + unpcklps m5, m1, m1 + unpckhps m1, m1 shl nd, 3 add lq, nq add rq, nq @@ -100,15 +104,12 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n align 16 .loop: + addps m4, m5 addps m0, m1 movddup m2, [lq+nq] movddup m3, [rq+nq] - movaps m4, m0 - movaps m5, m0 - unpcklps m4, m4 - unpckhps m5, m5 mulps m2, m4 - mulps m3, m5 + mulps m3, m0 addps m2, m3 movsd [lq+nq], m2 movhps [rq+nq], m2 _______________________________________________ ffmpeg-cvslog mailing list ffmpeg-cvslog@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog