# HG changeset patch # User Min Chen <chenm...@163.com> # Date 1384418720 -28800 # Node ID 493981f517c44293fd1134707a910b53cc688015 # Parent 8e22129119d6d8049996ed5f487625e4801b0a50 asm: residual buffer is alignment to size, so we can use alignment load instruction
diff -r 8e22129119d6 -r 493981f517c4 source/Lib/TLibEncoder/TEncSearch.cpp --- a/source/Lib/TLibEncoder/TEncSearch.cpp Thu Nov 14 16:45:03 2013 +0800 +++ b/source/Lib/TLibEncoder/TEncSearch.cpp Thu Nov 14 16:45:20 2013 +0800 @@ -501,6 +501,8 @@ primitives.blockfill_s[size](resiTmp, stride, 0); } + assert(((uint32_t)residual & (width - 1)) == 0); + assert(width <= 32); //===== reconstruction ===== primitives.calcrecon[size](pred, residual, recon, reconQt, reconIPred, stride, MAX_CU_SIZE, reconIPredStride); diff -r 8e22129119d6 -r 493981f517c4 source/common/x86/pixel-util.asm --- a/source/common/x86/pixel-util.asm Thu Nov 14 16:45:03 2013 +0800 +++ b/source/common/x86/pixel-util.asm Thu Nov 14 16:45:20 2013 +0800 @@ -239,10 +239,10 @@ cglobal calcRecons16 %if ARCH_X86_64 == 1 DECLARE_REG_TMP 0,1,2,3,4,5,6,7,8 - PROLOGUE 6,9,5 + PROLOGUE 6,9,3 %else DECLARE_REG_TMP 0,1,2,3,4,5 - PROLOGUE 6,7,5 + PROLOGUE 6,7,3 %define t6 r6m %define t6d r6d %define t7 r7m @@ -265,10 +265,8 @@ movu m2, [t0] pmovzxbw m1, m2 punpckhbw m2, m0 - movu m3, [t1] - movu m4, [t1 + 16] - paddw m1, m3 - paddw m2, m4 + paddw m1, [t1] + paddw m2, [t1 + 16] packuswb m1, m2 ; store recon[] and recipred[] @@ -296,10 +294,10 @@ cglobal calcRecons32 %if ARCH_X86_64 == 1 DECLARE_REG_TMP 0,1,2,3,4,5,6,7,8 - PROLOGUE 6,9,7 + PROLOGUE 6,9,5 %else DECLARE_REG_TMP 0,1,2,3,4,5 - PROLOGUE 6,7,7 + PROLOGUE 6,7,5 %define t6 r6m %define t6d r6d %define t7 r7m @@ -326,16 +324,12 @@ pmovzxbw m3, m4 punpckhbw m4, m0 - movu m5, [t1 + 0 * 16] - movu m6, [t1 + 1 * 16] - paddw m1, m5 - paddw m2, m6 + paddw m1, [t1 + 0 * 16] + paddw m2, [t1 + 1 * 16] packuswb m1, m2 - movu m5, [t1 + 2 * 16] - movu m6, [t1 + 3 * 16] - paddw m3, m5 - paddw m4, m6 + paddw m3, [t1 + 2 * 16] + paddw m4, [t1 + 3 * 16] packuswb m3, m4 ; store recon[] and recipred[] @@ -369,10 +363,10 @@ cglobal calcRecons64 %if ARCH_X86_64 == 1 DECLARE_REG_TMP 0,1,2,3,4,5,6,7,8 - PROLOGUE 6,9,7 + PROLOGUE 6,9,5 %else DECLARE_REG_TMP 0,1,2,3,4,5 - PROLOGUE 6,7,7 + PROLOGUE 6,7,5 %define t6 r6m %define t6d r6d %define t7 r7m @@ -400,16 +394,12 @@ pmovzxbw m3, m4 punpckhbw m4, m0 - movu m5, [t1 + 0 * 16] - movu m6, [t1 + 1 * 16] - paddw m1, m5 - paddw m2, m6 + paddw m1, [t1 + 0 * 16] + paddw m2, [t1 + 1 * 16] packuswb m1, m2 - movu m5, [t1 + 2 * 16] - movu m6, [t1 + 3 * 16] - paddw m3, m5 - paddw m4, m6 + paddw m3, [t1 + 2 * 16] + paddw m4, [t1 + 3 * 16] packuswb m3, m4 ; store recon[] and recipred[] @@ -436,16 +426,12 @@ pmovzxbw m3, m4 punpckhbw m4, m0 - movu m5, [t1 + 4 * 16] - movu m6, [t1 + 5 * 16] - paddw m1, m5 - paddw m2, m6 + paddw m1, [t1 + 4 * 16] + paddw m2, [t1 + 5 * 16] packuswb m1, m2 - movu m5, [t1 + 6 * 16] - movu m6, [t1 + 7 * 16] - paddw m3, m5 - paddw m4, m6 + paddw m3, [t1 + 6 * 16] + paddw m4, [t1 + 7 * 16] packuswb m3, m4 ; store recon[] and recipred[] _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel