At 2013-11-20 19:45:24,[email protected] wrote: ># HG changeset patch ># User Praveen Tiwari ># Date 1384947915 -19800 ># Node ID c1e556f54d61422d153ff67f4830dc62dd1111d9 ># Parent a7fb47a7eddf18634449a5ac898f7c2d029048e9 >asm code for pixeladd_ps_4x4 and testbench integration > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/CMakeLists.txt >--- a/source/common/CMakeLists.txt Wed Nov 20 12:57:57 2013 +0530 >+++ b/source/common/CMakeLists.txt Wed Nov 20 17:15:15 2013 +0530 >@@ -113,7 +113,7 @@ > > if(ENABLE_PRIMITIVES_ASM) > set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h) >- set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm >ipfilter8.asm pixel-util.asm blockcopy8.asm) >+ set(A_SRCS pixel-a.asm const-a.asm cpu-a.asm sad-a.asm mc-a.asm mc-a2.asm >ipfilter8.asm pixel-util.asm blockcopy8.asm pixeladd8.asm) > if (NOT X64) > set(A_SRCS ${A_SRCS} pixel-32.asm) > endif() >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/asm-primitives.cpp >--- a/source/common/x86/asm-primitives.cpp Wed Nov 20 12:57:57 2013 +0530 >+++ b/source/common/x86/asm-primitives.cpp Wed Nov 20 17:15:15 2013 +0530 >@@ -633,6 +633,13 @@ > p.calcrecon[BLOCK_32x32] = x265_calcRecons32_sse4; > p.calcresidual[BLOCK_16x16] = x265_getResidual16_sse4; > p.calcresidual[BLOCK_32x32] = x265_getResidual32_sse4; >+ >+ // This function pointer initialization is temporary will be removed >+ // later with macro definitions. It is used to avoid linker errors >+ // until all partitions are coded and commit smaller patches, easier >to >+ // review. >+ >+ p.chroma_add_ps[X265_CSP_I420][CHROMA_4x4] = >x265_pixel_add_ps_4x4_sse4; > } > if (cpuMask & X265_CPU_AVX) > { >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixel.h >--- a/source/common/x86/pixel.h Wed Nov 20 12:57:57 2013 +0530 >+++ b/source/common/x86/pixel.h Wed Nov 20 17:15:15 2013 +0530 >@@ -313,7 +313,8 @@ > SETUP_CHROMA_PIXELSUB_PS_FUNC(8, 32, cpu); > > #define SETUP_LUMA_PIXELSUB_PS_FUNC(W, H, cpu) \ >- void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t >destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1); >+ void x265_pixel_sub_ps_ ## W ## x ## H ## cpu(int16_t *dest, intptr_t >destride, pixel *src0, pixel *src1, intptr_t srcstride0, intptr_t srcstride1);\ >+ void x265_pixel_add_ps_ ## W ## x ## H ## cpu(pixel *dest, int destride, >pixel *src0, int16_t *scr1, int srcStride0, int srcStride1); > > #define LUMA_PIXELSUB_DEF(cpu) \ > SETUP_LUMA_PIXELSUB_PS_FUNC(4, 4, cpu); \ >@@ -342,6 +343,8 @@ > SETUP_LUMA_PIXELSUB_PS_FUNC(64, 16, cpu); \ > SETUP_LUMA_PIXELSUB_PS_FUNC(16, 64, cpu); > >+// void x265_pixeladd_ps_4x4_sse4(pixel *dest, int destride, pixel *src0, >int16_t *scr1, int srcStride0, int srcStride1); >+ remove unused line > CHROMA_PIXELSUB_DEF(_sse4); > LUMA_PIXELSUB_DEF(_sse4); > >diff -r a7fb47a7eddf -r c1e556f54d61 source/common/x86/pixeladd8.asm >--- /dev/null Thu Jan 01 00:00:00 1970 +0000 >+++ b/source/common/x86/pixeladd8.asm Wed Nov 20 17:15:15 2013 +0530 >@@ -0,0 +1,79 @@ >+;***************************************************************************** >+;* Copyright (C) 2013 x265 project >+;* >+;* Authors: Praveen Kumar Tiwari <[email protected]> >+;* >+;* This program is free software; you can redistribute it and/or modify >+;* it under the terms of the GNU General Public License as published by >+;* the Free Software Foundation; either version 2 of the License, or >+;* (at your option) any later version. >+;* >+;* This program is distributed in the hope that it will be useful, >+;* but WITHOUT ANY WARRANTY; without even the implied warranty of >+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >+;* GNU General Public License for more details. >+;* >+;* You should have received a copy of the GNU General Public License >+;* along with this program; if not, write to the Free Software >+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. >+;* >+;* This program is also available under a commercial proprietary license. >+;* For more information, contact us at [email protected]. >+;*****************************************************************************/ >+ >+%include "x86inc.asm" >+%include "x86util.asm" >+ >+SECTION_RODATA 32 >+ >+SECTION .text >+ >+;----------------------------------------------------------------------------- >+; void pixel_add_ps_4x4(pixel *dest, int destride, pixel *src0, int16_t >*scr1, int srcStride0, int srcStride1) >+;----------------------------------------------------------------------------- >+INIT_XMM sse4 >+cglobal pixel_add_ps_4x4, 6, 6, 2, dest, destride, src0, scr1, srcStride0, >srcStride1 >+ >+add r5, r5 >+ >+movd m0, [r2] >+pmovzxbw m0, m0 >+movh m1, [r3] we can merge movd and pmovzxbw, in Intel documents, this instruction is not need alignment to 16-bytes bound>+ >+paddw m0, m1 >+packuswb m0, m0 >+ >+movd [r0], m0 >+ >+movd m0, [r2 + r4] >+pmovzxbw m0, m0 >+movh m1, [r3 + r5] >+ >+paddw m0, m1 >+packuswb m0, m0 >+ >+movd [r0 + r1], m0 >+ >+movd m0, [r2 + 2 * r4] >+pmovzxbw m0, m0 >+movh m1, [r3 + 2 * r5] >+ >+paddw m0, m1 >+packuswb m0, m0 >+ >+movd [r0 + 2 * r1], m0 >+ >+lea r0, [r0 + 2 * r1] >+lea r2, [r2 + 2 * r4] >+lea r3, [r3 + 2 * r5] >+ >+movd m0, [r2 + r4] >+pmovzxbw m0, m0 >+movh m1, [r3 + r5] >+ >+paddw m0, m1 >+packuswb m0, m0 >+ >+movd [r0 + r1], m0 >+ >+RET >_______________________________________________ >x265-devel mailing list >[email protected] >https://mailman.videolan.org/listinfo/x265-devel
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
