>+;----------------------------------------------------------------------------- >+; int pixel_ssd_WxH( uint16_t *, intptr_t, uint16_t *, intptr_t ) >+;----------------------------------------------------------------------------- >+ >+%macro HEVC_SSD_SS 0 >+cglobal pixel_ssd_ss_4x4, 4,7,6 >+ pxor m0, m0 can be remove >+ pmovsxwd m1, [r0] >+ pmovsxwd m2, [r2] >+ psubd m1, m2
>+ pmulld m1, m1 >+ paddd m0, m1 use pmulld dest reg is better >+ lea r0, [r0 + r1*2] >+ lea r2, [r2 + r3*2] code ident >+ pmovsxwd m1, [r0] >+ pmovsxwd m2, [r2] >+ psubd m1, m2 >+ pmulld m1, m1 >+ paddd m0, m1 >+ lea r0, [r0 + r1*2] >+ lea r2, [r2 + r3*2] >+ pmovsxwd m1, [r0] >+ pmovsxwd m2, [r2] >+ psubd m1, m2 >+ pmulld m1, m1 >+ paddd m0, m1 >+ lea r0, [r0 + r1*2] >+ lea r2, [r2 + r3*2] >+ pmovsxwd m1, [r0] >+ pmovsxwd m2, [r2] >+ psubd m1, m2 >+ pmulld m1, m1 >+ paddd m0, m1 >+ phaddd m0, m0 >+ phaddd m0, m0 >+ movd eax, m0 >+ RET >+%endmacro >+ > %if HIGH_BIT_DEPTH == 0 > %macro SSD_LOAD_FULL 5 > mova m1, [t0+%1] >@@ -512,12 +551,17 @@ > %define SSD_CORE SSD_CORE_SSE2 > %define JOIN JOIN_SSE2 > HEVC_SSD >+HEVC_SSD_SS > INIT_XMM ssse3 > %define SSD_CORE SSD_CORE_SSSE3 > %define JOIN JOIN_SSSE3 > HEVC_SSD >+HEVC_SSD_SS >+INIT_XMM sse4 >+HEVC_SSD_SS > INIT_XMM avx > HEVC_SSD >+HEVC_SSD_SS > INIT_MMX ssse3 > SSD 4, 4 > SSD 4, 8 >diff -r d2173ec27a15 -r 98bcf33302ef source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Thu Nov 21 20:16:39 2013 +0530 >+++ b/source/common/x86/pixel.h Fri Nov 22 18:57:18 2013 +0530 >@@ -59,6 +59,9 @@ > #define DECL_X1(name, suffix) \ > DECL_PIXELS(int, name, suffix, (pixel *, intptr_t, pixel *, intptr_t)) > >+#define DECL_X1_SS(name, suffix) \ >+ DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t)) >+ > #define DECL_X4(name, suffix) \ > DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel > *, intptr_t, int *)) \ > DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel > *, pixel *, intptr_t, int *)) >@@ -86,6 +89,15 @@ > DECL_X1(ssd, avx) > DECL_X1(ssd, xop) > DECL_X1(ssd, avx2) >+DECL_X1_SS(ssd_ss, mmx) >+DECL_X1_SS(ssd_ss, mmx2) >+DECL_X1_SS(ssd_ss, sse2slow) >+DECL_X1_SS(ssd_ss, sse2) >+DECL_X1_SS(ssd_ss, ssse3) >+DECL_X1_SS(ssd_ss, sse4) >+DECL_X1_SS(ssd_ss, avx) >+DECL_X1_SS(ssd_ss, xop) >+DECL_X1_SS(ssd_ss, avx2) > DECL_X1(satd, mmx2) > DECL_X1(satd, sse2) > DECL_X1(satd, ssse3) >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
