---------- Forwarded message ---------- From: Steve Borho <[email protected]> Date: 2013/11/8 Subject: Re: [x265] [PATCH] asm code for blockfil_s, 4x4 To: Development for x265 <[email protected]>
On Thu, Nov 7, 2013 at 6:56 AM, <[email protected]> wrote: > # HG changeset patch > # User Praveen Tiwari > # Date 1383828996 -19800 > # Node ID f2af7af43dfcb08135a08e755f654314a89efae7 > # Parent d71f86b1c58b4fc9f8a3ffeaaef45c60f8bcc468 > asm code for blockfil_s, 4x4 > > >>blockfill has two l Actually I named all pointers with blockfill (two I) and function with blockfil (one I), perhaps matching naming convention from old code but seems odd, I will take care off it. diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Thu Nov 07 18:16:22 2013 > +0530 > +++ b/source/common/x86/asm-primitives.cpp Thu Nov 07 18:26:36 2013 > +0530 > @@ -361,6 +361,8 @@ > p.luma_copy_sp[LUMA_64x32] = x265_blockcopy_sp_64x32_sse2; > p.luma_copy_sp[LUMA_64x48] = x265_blockcopy_sp_64x48_sse2; > p.luma_copy_sp[LUMA_64x64] = x265_blockcopy_sp_64x64_sse2; > + > + p.blockfill_s[BLOCK_4x4] = x265_blockfil_s_4x4_sse2; > #if X86_64 > p.satd[LUMA_8x32] = x265_pixel_satd_8x32_sse2; > p.satd[LUMA_16x4] = x265_pixel_satd_16x4_sse2; > diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/blockcopy8.asm > --- a/source/common/x86/blockcopy8.asm Thu Nov 07 18:16:22 2013 +0530 > +++ b/source/common/x86/blockcopy8.asm Thu Nov 07 18:26:36 2013 +0530 > @@ -1646,3 +1646,22 @@ > BLOCKCOPY_SP_W64_H1 64, 32 > BLOCKCOPY_SP_W64_H1 64, 48 > BLOCKCOPY_SP_W64_H1 64, 64 > + > > +;----------------------------------------------------------------------------- > +; void blockfil_s_4x4(int16_t *dest, intptr_t destride, int16_t val) > > +;----------------------------------------------------------------------------- > +INIT_XMM sse2 > +cglobal blockfil_s_4x4, 3, 3, 1, dest, destStride, val > + > +add r1, r1 > + > +movd m0, r2d > +pshuflw m0, m0, 0 > + > +movh [r0], m0 > +movh [r0 + r1], m0 > +movh [r0 + 2 * r1], m0 > +lea r0, [r0 + 2 * r1] > +movh [r0 + r1], m0 > + > +RET > diff -r d71f86b1c58b -r f2af7af43dfc source/common/x86/pixel.h > --- a/source/common/x86/pixel.h Thu Nov 07 18:16:22 2013 +0530 > +++ b/source/common/x86/pixel.h Thu Nov 07 18:26:36 2013 +0530 > @@ -266,6 +266,8 @@ > DECL_ADS(2, avx2) > DECL_ADS(1, avx2) > > +void x265_blockfil_s_4x4_sse2(int16_t *dst, intptr_t dstride, int16_t > val); > + > >>this belongs in blockcopy8.h Will be moved to blockcopy8.h. > #undef DECL_PIXELS > #undef DECL_SUF > #undef DECL_HEVC_SSD > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Steve Borho _______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
