Steve, I have added partition based call for all size s of chroma function can you tell whether it is right direction, if it so I will replace the luma functions similarly. I have few question about approach about combining functions as you said, [12:29:36 PM] Steve Borho: if YFrac is 0, it would just do lumaH_pp. If XFrac is 0, it would just do lumaV_pp. else it does src -> lumaH_ps -> temp -> lumaV_sp -> dst
Does YFrac and XFrac are template parameters? I think we need to combine both functions C codes in a single function and pass an extra buffer temp as an argument, further we have to modify intrinsic and asm code too? Regards, Praveen Tiwari On Wed, Oct 9, 2013 at 7:24 PM, <[email protected]> wrote: > # HG changeset patch > # User Praveen Tiwari > # Date 1381326812 -19800 > # Node ID 37b42347a5baefe11822888d385e4c8422f4f3f3 > # Parent fc7fbdd18bc0d6d7f98180332e065d83c054fe02 > Chroma function, partion based call > > diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/ipfilter.cpp > --- a/source/common/ipfilter.cpp Wed Oct 09 00:00:10 2013 -0500 > +++ b/source/common/ipfilter.cpp Wed Oct 09 19:23:32 2013 +0530 > @@ -34,6 +34,56 @@ > #pragma warning(disable: 4127) // conditional expression is constant, > typical for templated functions > #endif > > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, WIDTH) > \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] = > FUNC_PREFIX<4, WIDTH, 4>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, WIDTH) > \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2] = > FUNC_PREFIX<4, WIDTH, 2>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] = > FUNC_PREFIX<4, WIDTH, 4>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] = > FUNC_PREFIX<4, WIDTH, 16>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, WIDTH) > \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, WIDTH) > \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x2] = > FUNC_PREFIX<4, WIDTH, 2>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] = > FUNC_PREFIX<4, WIDTH, 4>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x6] = > FUNC_PREFIX<4, WIDTH, 6>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] = > FUNC_PREFIX<4, WIDTH, 16>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] = > FUNC_PREFIX<4, WIDTH, 32>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, > WIDTH) \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] = > FUNC_PREFIX<4, WIDTH, 16>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, > WIDTH) \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x4] = > FUNC_PREFIX<4, WIDTH, 4>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x12] = > FUNC_PREFIX<4, WIDTH, 12>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] = > FUNC_PREFIX<4, WIDTH, 16>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] = > FUNC_PREFIX<4, WIDTH, 32>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, > WIDTH) \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] = > FUNC_PREFIX<4, WIDTH, 32>; > + > +#define > SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, > WIDTH) \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x8] = > FUNC_PREFIX<4, WIDTH, 8>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x16] = > FUNC_PREFIX<4, WIDTH, 16>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x24] = > FUNC_PREFIX<4, WIDTH, 24>; \ > + p.FUNC_PREFIX[CHROMA_HORIZONTAL_PP_PARTITION_ ## WIDTH ## x32] = > FUNC_PREFIX<4, WIDTH, 32>; > + > +#define SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(FUNC_PREFIX) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W2(FUNC_PREFIX, > 2) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W4(FUNC_PREFIX, > 4) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W6(FUNC_PREFIX, > 6) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W8(FUNC_PREFIX, > 8) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W12(FUNC_PREFIX, > 12) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W16(FUNC_PREFIX, > 16) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W24(FUNC_PREFIX, > 24) \ > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE_SUBSET_W32(FUNC_PREFIX, > 32) \ > + > namespace { > template<int N> > void filterVertical_s_p(short *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, int width, int height, short const *coeff) > @@ -88,8 +138,8 @@ > } > } > > -template<int N> > -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, int width, int height, short const *coeff) > +template<int N, int width, int height> > +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, short const *coeff) > { > int cStride = 1; > > @@ -500,11 +550,13 @@ > > void Setup_C_IPFilterPrimitives(EncoderPrimitives& p) > { > + > + SET_CHROMA_HORIZONTAL_PP_FUNC_PRIMITIVE_TABLE(filterHorizontal_p_p) > + > p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>; > p.ipfilter_ps[FILTER_H_P_S_8] = filterHorizontal_p_s<8>; > p.ipfilter_ps[FILTER_V_P_S_8] = filterVertical_p_s<8>; > p.ipfilter_sp[FILTER_V_S_P_8] = filterVertical_s_p<8>; > - p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>; > p.ipfilter_ps[FILTER_H_P_S_4] = filterHorizontal_p_s<4>; > p.ipfilter_ps[FILTER_V_P_S_4] = filterVertical_p_s<4>; > p.ipfilter_sp[FILTER_V_S_P_4] = filterVertical_s_p<4>; > diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/primitives.h > --- a/source/common/primitives.h Wed Oct 09 00:00:10 2013 -0500 > +++ b/source/common/primitives.h Wed Oct 09 19:23:32 2013 +0530 > @@ -89,6 +89,17 @@ > NUM_PARTITIONS > }; > > +enum ChromaPartions > +{ > + CHROMA_HORIZONTAL_PP_PARTITION_2x4, > CHROMA_HORIZONTAL_PP_PARTITION_2x8, CHROMA_HORIZONTAL_PP_PARTITION_4x2, > CHROMA_HORIZONTAL_PP_PARTITION_4x4, > + CHROMA_HORIZONTAL_PP_PARTITION_4x8, > CHROMA_HORIZONTAL_PP_PARTITION_4x16, CHROMA_HORIZONTAL_PP_PARTITION_8x2, > CHROMA_HORIZONTAL_PP_PARTITION_8x4, > + CHROMA_HORIZONTAL_PP_PARTITION_8x6, > CHROMA_HORIZONTAL_PP_PARTITION_8x8, CHROMA_HORIZONTAL_PP_PARTITION_8x16, > CHROMA_HORIZONTAL_PP_PARTITION_8x32, > + CHROMA_HORIZONTAL_PP_PARTITION_6x8, > CHROMA_HORIZONTAL_PP_PARTITION_12x16, > CHROMA_HORIZONTAL_PP_PARTITION_16x4, CHROMA_HORIZONTAL_PP_PARTITION_16x8, > + CHROMA_HORIZONTAL_PP_PARTITION_16x12, > CHROMA_HORIZONTAL_PP_PARTITION_16x16, > CHROMA_HORIZONTAL_PP_PARTITION_16x32, > CHROMA_HORIZONTAL_PP_PARTITION_24x32, > + CHROMA_HORIZONTAL_PP_PARTITION_32x8, > CHROMA_HORIZONTAL_PP_PARTITION_32x16, > CHROMA_HORIZONTAL_PP_PARTITION_32x24, > CHROMA_HORIZONTAL_PP_PARTITION_32x32, > + NUM_CHROMA_HORIZONTAL_PP_PARTITIONS > +}; > + > enum SquareBlocks // Routines can be indexed using log2n(width) > { > BLOCK_4x4, > @@ -205,6 +216,8 @@ > typedef void (*ssim_4x4x2_core_t)(const pixel *pix1, intptr_t stride1, > const pixel *pix2, intptr_t stride2, ssim_t sums[2][4]); > typedef float (*ssim_end4_t)(ssim_t sum0[5][4], ssim_t sum1[5][4], int > width); > > +typedef void (*chromaFilterHoriz_pp)(pixel *src, intptr_t srcStride, > pixel *dst, intptr_t dstStride, const short *coeff); // Modified argument > list for chroma filter, removed width and height. > + > /* Define a structure containing function pointers to optimized encoder > * primitives. Each pointer can reference either an assembly routine, > * a vectorized primitive, or a C function. */ > @@ -265,6 +278,8 @@ > downscale_t frame_init_lowres_core; > ssim_4x4x2_core_t ssim_4x4x2_core; > ssim_end4_t ssim_end_4; > + > + chromaFilterHoriz_pp > filterHorizontal_p_p[NUM_CHROMA_HORIZONTAL_PP_PARTITIONS]; > }; > > /* This copy of the table is what gets used by the encoder. > diff -r fc7fbdd18bc0 -r 37b42347a5ba source/common/vec/ipfilter-sse41.cpp > --- a/source/common/vec/ipfilter-sse41.cpp Wed Oct 09 00:00:10 2013 > -0500 > +++ b/source/common/vec/ipfilter-sse41.cpp Wed Oct 09 19:23:32 2013 > +0530 > @@ -1541,8 +1541,8 @@ > -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 > }; > > -template<int N> > -void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, int width, int height, short const *coeff) > +template<int N, int width, int height> > +void filterHorizontal_p_p(pixel *src, intptr_t srcStride, pixel *dst, > intptr_t dstStride, short const *coeff) > { > assert(X265_DEPTH == 8); > > @@ -1656,9 +1656,35 @@ > } > > namespace x265 { > +#define SETUP_PARTITION(W, H) \ > + p.filterHorizontal_p_p[CHROMA_HORIZONTAL_PP_PARTITION_##W##x##H] = > filterHorizontal_p_p##<4, W, H>; > + > void Setup_Vec_IPFilterPrimitives_sse41(EncoderPrimitives& p) > { > - p.ipfilter_pp[FILTER_H_P_P_4] = filterHorizontal_p_p<4>; > + SETUP_PARTITION(2, 4); > + SETUP_PARTITION(2, 8); > + SETUP_PARTITION(4, 2); > + SETUP_PARTITION(4, 4); > + SETUP_PARTITION(4, 8); > + SETUP_PARTITION(4, 16); > + SETUP_PARTITION(6, 8); > + SETUP_PARTITION(8, 2); > + SETUP_PARTITION(8, 4); > + SETUP_PARTITION(8, 6); > + SETUP_PARTITION(8, 8); > + SETUP_PARTITION(8, 16); > + SETUP_PARTITION(12, 16); > + SETUP_PARTITION(16, 4); > + SETUP_PARTITION(16, 8); > + SETUP_PARTITION(16, 12); > + SETUP_PARTITION(16, 16); > + SETUP_PARTITION(16, 32); > + SETUP_PARTITION(24, 32); > + SETUP_PARTITION(32, 8); > + SETUP_PARTITION(32, 16); > + SETUP_PARTITION(32, 24); > + SETUP_PARTITION(32, 32); > + > p.ipfilter_pp[FILTER_H_P_P_8] = filterHorizontal_p_p<8>; > > p.ipfilter_pp[FILTER_V_P_P_4] = filterVertical_p_p<4>; >
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
