# HG changeset patch # User Murugan Vairavel <muru...@multicorewareinc.com> # Date 1385632076 -19800 # Thu Nov 28 15:17:56 2013 +0530 # Node ID f0d2ef33a0bdb41b9b3d7edb9e0b7358b0783271 # Parent 7a0fe2f9074330bb3126e95194e7c4ed956c6e4d asm: cleanups for pixel_sse_sp
diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Thu Nov 28 14:58:39 2013 +0530 +++ b/source/common/x86/asm-primitives.cpp Thu Nov 28 15:17:56 2013 +0530 @@ -450,6 +450,36 @@ SETUP_PIXEL_VAR_DEF(32, 32, cpu); \ SETUP_PIXEL_VAR_DEF(64, 64, cpu); +#define SETUP_PIXEL_SSE_SP_DEF(W, H, cpu) \ + p.sse_sp[LUMA_ ## W ## x ## H] = x265_pixel_ssd_sp_ ## W ## x ## H ## cpu; + +#define LUMA_SSE_SP(cpu) \ + SETUP_PIXEL_SSE_SP_DEF(4, 4, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(8, 8, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(8, 4, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(4, 8, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 8, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(8, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 12, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(12, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 4, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(4, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(32, 32, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(32, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 32, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(32, 24, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(24, 32, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(32, 8, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(8, 32, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(64, 64, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(64, 32, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(32, 64, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(64, 48, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(48, 64, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(64, 16, cpu); \ + SETUP_PIXEL_SSE_SP_DEF(16, 64, cpu); + namespace x265 { // private x265 namespace @@ -648,31 +678,7 @@ p.sse_pp[LUMA_64x48] = x265_pixel_ssd_64x48_sse4; p.sse_pp[LUMA_64x64] = x265_pixel_ssd_64x64_sse4; - p.sse_sp[LUMA_4x4] = x265_pixel_ssd_sp_4x4_sse4; - p.sse_sp[LUMA_4x8] = x265_pixel_ssd_sp_4x8_sse4; - p.sse_sp[LUMA_4x16] = x265_pixel_ssd_sp_4x16_sse4; - p.sse_sp[LUMA_8x4] = x265_pixel_ssd_sp_8x4_sse4; - p.sse_sp[LUMA_8x8] = x265_pixel_ssd_sp_8x8_sse4; - p.sse_sp[LUMA_8x16] = x265_pixel_ssd_sp_8x16_sse4; - p.sse_sp[LUMA_8x32] = x265_pixel_ssd_sp_8x32_sse4; - p.sse_sp[LUMA_12x16] = x265_pixel_ssd_sp_12x16_sse4; - p.sse_sp[LUMA_16x4] = x265_pixel_ssd_sp_16x4_sse4; - p.sse_sp[LUMA_16x8] = x265_pixel_ssd_sp_16x8_sse4; - p.sse_sp[LUMA_16x12] = x265_pixel_ssd_sp_16x12_sse4; - p.sse_sp[LUMA_16x16] = x265_pixel_ssd_sp_16x16_sse4; - p.sse_sp[LUMA_16x32] = x265_pixel_ssd_sp_16x32_sse4; - p.sse_sp[LUMA_16x64] = x265_pixel_ssd_sp_16x64_sse4; - p.sse_sp[LUMA_24x32] = x265_pixel_ssd_sp_24x32_sse4; - p.sse_sp[LUMA_32x8] = x265_pixel_ssd_sp_32x8_sse4; - p.sse_sp[LUMA_32x16] = x265_pixel_ssd_sp_32x16_sse4; - p.sse_sp[LUMA_32x24] = x265_pixel_ssd_sp_32x24_sse4; - p.sse_sp[LUMA_32x32] = x265_pixel_ssd_sp_32x32_sse4; - p.sse_sp[LUMA_32x64] = x265_pixel_ssd_sp_32x64_sse4; - p.sse_sp[LUMA_48x64] = x265_pixel_ssd_sp_48x64_sse4; - p.sse_sp[LUMA_64x16] = x265_pixel_ssd_sp_64x16_sse4; - p.sse_sp[LUMA_64x32] = x265_pixel_ssd_sp_64x32_sse4; - p.sse_sp[LUMA_64x48] = x265_pixel_ssd_sp_64x48_sse4; - p.sse_sp[LUMA_64x64] = x265_pixel_ssd_sp_64x64_sse4; + LUMA_SSE_SP(_sse4); CHROMA_PIXELSUB_PS(_sse4); diff -r 7a0fe2f90743 -r f0d2ef33a0bd source/common/x86/pixel.h --- a/source/common/x86/pixel.h Thu Nov 28 14:58:39 2013 +0530 +++ b/source/common/x86/pixel.h Thu Nov 28 15:17:56 2013 +0530 @@ -62,6 +62,9 @@ #define DECL_X1_SS(name, suffix) \ DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, int16_t *, intptr_t)) +#define DECL_X1_SP(name, suffix) \ + DECL_PIXELS(int, name, suffix, (int16_t *, intptr_t, pixel *, intptr_t)) + #define DECL_X4(name, suffix) \ DECL_PIXELS(void, name ## _x3, suffix, (pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) \ DECL_PIXELS(void, name ## _x4, suffix, (pixel *, pixel *, pixel *, pixel *, pixel *, intptr_t, int *)) @@ -98,6 +101,7 @@ DECL_X1_SS(ssd_ss, avx) DECL_X1_SS(ssd_ss, xop) DECL_X1_SS(ssd_ss, avx2) +DECL_X1_SP(ssd_sp, sse4) DECL_X1(satd, mmx2) DECL_X1(satd, sse2) DECL_X1(satd, ssse3) @@ -401,30 +405,4 @@ void x265_dequant_normal_sse4(const int32_t* quantCoef, int32_t* coef, int num, int scale, int shift); void x265_weight_pp_sse4(pixel *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); void x265_weight_sp_sse4(int16_t *src, pixel *dst, intptr_t srcStride, intptr_t dstStride, int width, int height, int w0, int round, int shift, int offset); - -int x265_pixel_ssd_sp_4x4_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_4x8_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_4x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_8x4_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_8x8_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_8x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_8x32_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_12x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x4_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x8_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x12_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x32_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_16x64_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_24x32_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_32x8_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_32x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_32x24_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_32x32_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_32x64_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_48x64_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_64x16_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_64x32_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_64x48_sse4(int16_t *, intptr_t, pixel *, intptr_t); -int x265_pixel_ssd_sp_64x64_sse4(int16_t *, intptr_t, pixel *, intptr_t); #endif // ifndef X265_I386_PIXEL_H _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel