On Mon, Feb 17, 2014 at 6:44 AM, <naba...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Nabajit Deka > # Date 1392641037 -19800 > # Mon Feb 17 18:13:57 2014 +0530 > # Node ID f5275ca8f2985bb0daf563738e6071b81967c2cd > # Parent ce96cdb390fe26aee6effa731e51303c1d9056b0 > asm : asm routine for chroma_p2s for 4:4:4 color space format > Queued. There needs to be a comment somewhere about how the chroma_p2s 444 primitive is different from the others. > > diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Sun Feb 16 22:47:32 2014 > -0600 > +++ b/source/common/x86/asm-primitives.cpp Mon Feb 17 18:13:57 2014 > +0530 > @@ -1119,8 +1119,8 @@ > > p.luma_hvpp[LUMA_8x8] = x265_interp_8tap_hv_pp_8x8_ssse3; > p.luma_p2s = x265_luma_p2s_ssse3; > - p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_ssse3; > p.chroma_p2s[X265_CSP_I420] = x265_chroma_p2s_ssse3; > + p.chroma_p2s[X265_CSP_I444] = x265_chroma_p2s_i444_ssse3; > > CHROMA_SP_FILTERS_420(_ssse3); > CHROMA_SP_FILTERS_444(_ssse3); > diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.asm > --- a/source/common/x86/ipfilter8.asm Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/x86/ipfilter8.asm Mon Feb 17 18:13:57 2014 +0530 > @@ -3680,6 +3680,64 @@ > > RET > > +INIT_XMM ssse3 > +cglobal chroma_p2s_i444, 3, 7, 4 > + > + ; load width and height > + mov r3d, r3m > + mov r4d, r4m > + > + ; load constant > + mova m2, [tab_c_128] > + mova m3, [tab_c_64_n64] > + > +.loopH: > + > + xor r5d, r5d > +.loopW: > + lea r6, [r0 + r5] > + > + movh m0, [r6] > + punpcklbw m0, m2 > + pmaddubsw m0, m3 > + > + movh m1, [r6 + r1] > + punpcklbw m1, m2 > + pmaddubsw m1, m3 > + > + add r5d, 8 > + cmp r5d, r3d > + lea r6, [r2 + r5 * 2] > + jg .width4 > + movu [r6 + FENC_STRIDE * 0 - 16], m0 > + movu [r6 + FENC_STRIDE * 2 - 16], m1 > + je .nextH > + jmp .loopW > + > +.width4: > + test r3d, 4 > + jz .width2 > + test r3d, 2 > + movh [r6 + FENC_STRIDE * 0 - 16], m0 > + movh [r6 + FENC_STRIDE * 2 - 16], m1 > + lea r6, [r6 + 8] > + pshufd m0, m0, 2 > + pshufd m1, m1, 2 > + jz .nextH > + > +.width2: > + movd [r6 + FENC_STRIDE * 0 - 16], m0 > + movd [r6 + FENC_STRIDE * 2 - 16], m1 > + > +.nextH: > + lea r0, [r0 + r1 * 2] > + add r2, FENC_STRIDE * 4 > + > + sub r4d, 2 > + jnz .loopH > + > + RET > + > %macro PROCESS_CHROMA_SP_W4_4R 0 > movq m0, [r0] > movq m1, [r0 + r1] > diff -r ce96cdb390fe -r f5275ca8f298 source/common/x86/ipfilter8.h > --- a/source/common/x86/ipfilter8.h Sun Feb 16 22:47:32 2014 -0600 > +++ b/source/common/x86/ipfilter8.h Mon Feb 17 18:13:57 2014 +0530 > @@ -214,6 +214,7 @@ > void x265_interp_8tap_hv_pp_8x8_ssse3(pixel * src, intptr_t srcStride, > pixel * dst, intptr_t dstStride, int idxX, int idxY); > void x265_luma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, > int width, int height); > void x265_chroma_p2s_ssse3(pixel *src, intptr_t srcStride, int16_t *dst, > int width, int height); > +void x265_chroma_p2s_i444_ssse3(pixel *src, intptr_t srcStride, int16_t > *dst, int width, int height); > void x265_interp_4tap_vert_sp_2x4_sse4(int16_t * src, intptr_t srcStride, > pixel * dst, intptr_t dstStride, int coeffIdx); > void x265_interp_4tap_vert_sp_2x8_sse4(int16_t * src, intptr_t srcStride, > pixel * dst, intptr_t dstStride, int coeffIdx); > void x265_interp_4tap_vert_sp_6x8_sse4(int16_t * src, intptr_t srcStride, > pixel * dst, intptr_t dstStride, int coeffIdx); > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel