2014-07-27 10:20 GMT+02:00 Christophe Gisquet <christophe.gisq...@gmail.com>: > This is already used for qpel/luma filters.
And with patch.
From 4c9ef00c805b5ea81d96955bdfbc57ab7b383934 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet <christophe.gisq...@gmail.com> Date: Fri, 25 Jul 2014 15:08:49 +0200 Subject: [PATCH 10/13] x86: hevc_mc: load less data in epel filters Before: 5679 decicycles in epel_bi, 2059976 runs, 37176 skips 3468 decicycles in epel_uni, 1040886 runs, 7690 skips After: 5323 decicycles in epel_bi, 2059493 runs, 37659 skips 3262 decicycles in epel_uni, 1040871 runs, 7705 skips --- libavcodec/x86/hevc_mc.asm | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index a4b7a03..f0ef6a2 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -176,15 +176,23 @@ QPEL_TABLE 12, 4, w, sse4 %else %define rfilterq %2 %endif - movdqu m0, [rfilterq ] ;load 128bit of x +%if (%1 == 8 && %4 <= 4) +%define %%load movd +%elif (%1 == 8 && %4 <= 8) || (%1 > 8 && %4 <= 4) +%define %%load movq +%else +%define %%load movdqu +%endif + + %%load m0, [rfilterq ] %ifnum %3 - movdqu m1, [rfilterq+ %3] ;load 128bit of x+stride - movdqu m2, [rfilterq+2*%3] ;load 128bit of x+2*stride - movdqu m3, [rfilterq+3*%3] ;load 128bit of x+3*stride + %%load m1, [rfilterq+ %3] + %%load m2, [rfilterq+2*%3] + %%load m3, [rfilterq+3*%3] %else - movdqu m1, [rfilterq+ %3q] ;load 128bit of x+stride - movdqu m2, [rfilterq+2*%3q] ;load 128bit of x+2*stride - movdqu m3, [rfilterq+r3srcq] ;load 128bit of x+2*stride + %%load m1, [rfilterq+ %3q] + %%load m2, [rfilterq+2*%3q] + %%load m3, [rfilterq+r3srcq] %endif %if %1 == 8 -- 1.9.2.msysgit.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel