Merged, sent implementation. Regards, Praveen Tiwari
On Wed, Nov 20, 2013 at 6:08 PM, chen <[email protected]> wrote: > At 2013-11-20 19:45:24,[email protected] wrote: > ># HG changeset patch > ># User Praveen Tiwari > ># Date 1384947915 -19800 > ># Node ID c1e556f54d61422d153ff67f4830dc62dd1111d9 > ># Parent a7fb47a7eddf18634449a5ac898f7c2d029048e9 > >asm code for pixeladd_ps_4x4 and testbench integration > > > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/CMakeLists.txt > >--- a/source/common/CMakeLists.txt Wed Nov 20 12:57:57 2013 +0530 > >+++ b/source/common/CMakeLists.txt Wed Nov 20 17:15:15 2013 +0530 > >@@ -113,7 +113,7 @@ > > > > if(ENABLE_PRIMITIVES_ASM) > > set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h) > >- set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm > >mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm) > >+ set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm > >mc-a2.asm ipfilter8.asm pixel-util.asm blockcopy8.asm pixeladd8.asm) > > if (NOT X64) > > set(A_SRCS ${A_SRCS} pixel-32.asm) > > endif() > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/asm-primitives.cpp > >--- a/source/common/x86/asm-primitives.cpp Wed Nov 20 12:57:57 2013 +0530 > >+++ b/source/common/x86/asm-primitives.cpp Wed Nov 20 17:15:15 2013 +0530 > >@@ -633,6 +633,13 @@ > > p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4; > > p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4; > > p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4; > >+ > >+ // This function pointer initialization is temporary will be removed > >+ // later with macro definitions. It is used to avoid linker errors > >+ // until all partitions are coded and commit smaller patches, > >easier to > >+ // review. > >+ > >+ p.chroma_add_ps[X265_CSP_I420][CHROMA_4x4] = > >x265_pixel_add_ps_4x4_sse4; > > } > > if (cpuMask & X265_CPU_AVX) > > { > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixel.h > >--- a/source/common/x86/pixel.h Wed Nov 20 12:57:57 2013 +0530 > >+++ b/source/common/x86/pixel.h Wed Nov 20 17:15:15 2013 +0530 > >@@ -313,7 +313,8 @@ > > SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu); > > > > #define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \ > >- void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t > >destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t > >srcstride1); > >+ void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t > >destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t > >srcstride1);\ > >+ void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel *dest, int > >destride, pixel *src0, int16_t *scr1, int srcStride0, int srcStride1); > > > > #define LUMA_PIXELSUB_DEF(cpu) \ > > SETUP_LUMA_PIXELSUB_PS_FUNC(4, 4, cpu); \ > >@@ -342,6 +343,8 @@ > > SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \ > > SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu); > > > >+// void x265_pixeladd_ps_4x4_sse4(pixel *dest, int destride, pixel > >*src0, int16_t *scr1, int srcStride0, int srcStride1); > >+ > remove unused line > > > > > CHROMA_PIXELSUB_DEF(_sse4); > > LUMA_PIXELSUB_DEF(_sse4); > > > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixeladd8.asm > >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 > >+++ b/source/common/x86/pixeladd8.asm Wed Nov 20 17:15:15 2013 +0530 > >@@ -0,0 +1,79 @@ > >+;***************************************************************************** > >+;* Copyright (C) 2013 x265 project > >+;* > >+;* Authors: Praveen Kumar Tiwari <[email protected]> > >+;* > >+;* This program is free software; you can redistribute it and/or modify > >+;* it under the terms of the GNU General Public License as published by > >+;* the Free Software Foundation; either version 2 of the License, or > >+;* (at your option) any later version. > >+;* > >+;* This program is distributed in the hope that it will be useful, > >+;* but WITHOUT ANY WARRANTY; without even the implied warranty of > >+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >+;* GNU General Public License for more details. > >+;* > >+;* You should have received a copy of the GNU General Public License > >+;* along with this program; if not, write to the Free Software > >+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, > >USA. > >+;* > >+;* This program is also available under a commercial proprietary license. > >+;* For more information, contact us at [email protected]. > >+;*****************************************************************************/ > >+ > >+%include "x86inc.asm" > >+%include "x86util.asm" > >+ > >+SECTION_RODATA 32 > >+ > >+SECTION .text > >+ > >+;----------------------------------------------------------------------------- > >+; void pixel_add_ps_4x4(pixel *dest, int destride, pixel *src0, int16_t > >*scr1, int srcStride0, int srcStride1) > >+;----------------------------------------------------------------------------- > >+INIT_XMM sse4 > >+cglobal pixel_add_ps_4x4, 6, 6, 2, dest, destride, src0, scr1, srcStride0, > >srcStride1 > >+ > >+add r5, r5 > >+ > >+movd m0, [r2] > >+pmovzxbw m0, m0 > >+movh m1, [ > r3] > we can merge movd and pmovzxbw, in Intel documents, this instruction is not > need alignment to 16-bytes bound > > >+ > >+paddw m0, m1 > >+packuswb m0, m0 > >+ > >+movd [r0], m0 > >+ > >+movd m0, [r2 + r4] > >+pmovzxbw m0, m0 > >+movh m1, [r3 + r5] > >+ > >+paddw m0, m1 > >+packuswb m0, m0 > >+ > >+movd [r0 + r1], m0 > >+ > >+movd m0, [r2 + 2 * r4] > >+pmovzxbw m0, m0 > >+movh m1, [r3 + 2 * r5] > >+ > >+paddw m0, m1 > >+packuswb m0, m0 > >+ > >+movd [r0 + 2 * r1], m0 > >+ > >+lea r0, [r0 + 2 * r1] > >+lea r2, [r2 + 2 * r4] > >+lea r3, [r3 + 2 * r5] > >+ > >+movd m0, [r2 + r4] > >+pmovzxbw m0, m0 > >+movh m1, [r3 + r5] > >+ > >+paddw m0, m1 > >+packuswb m0, m0 > >+ > >+movd [r0 + r1], > m0 > >+ > >+RET > >_______________________________________________ > >x265-devel mailing list > >[email protected] > >https://mailman.videolan.org/listinfo/x265-devel > > > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
