On Nov 18, 2013, at 9:23 AM, [email protected] wrote: > # HG changeset patch > # User Praveen Tiwari > # Date 1384788209 -19800 > # Node ID 59646d515e79b4d0f9a3a72c77c7af17a83bf3d9 > # Parent b353d170c54f0e33a8869c413be226a48deb1f5c > added csp support for blpckcopy_ps > > diff -r b353d170c54f -r 59646d515e79 source/common/pixel.cpp > --- a/source/common/pixel.cpp Mon Nov 18 19:34:07 2013 +0530 > +++ b/source/common/pixel.cpp Mon Nov 18 20:53:29 2013 +0530 > @@ -837,7 +837,7 @@ > #define CHROMA(W, H) \ > p.chroma_copy_pp[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_pp_c<W, H>; \ > p.chroma_copy_sp[CHROMA_ ## W ## x ## H] = blockcopy_sp_c<W, H>; \ > - p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, H>;\ > + p.chroma_copy_ps[CSP_I420][CHROMA_ ## W ## x ## H] = blockcopy_ps_c<W, > H>;\ > p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = pixel_sub_ps_c<W, H>; > > #define LUMA(W, H) \ > diff -r b353d170c54f -r 59646d515e79 source/common/primitives.h > --- a/source/common/primitives.h Mon Nov 18 19:34:07 2013 +0530 > +++ b/source/common/primitives.h Mon Nov 18 20:53:29 2013 +0530 > @@ -247,7 +247,7 @@ > copy_sp_t luma_copy_sp[NUM_LUMA_PARTITIONS]; > copy_sp_t chroma_copy_sp[NUM_CHROMA_PARTITIONS]; > copy_ps_t luma_copy_ps[NUM_LUMA_PARTITIONS]; > - copy_ps_t chroma_copy_ps[NUM_CHROMA_PARTITIONS]; > + copy_ps_t chroma_copy_ps[NUM_CSP][NUM_CHROMA_PARTITIONS]; > > pixel_sub_ps_t luma_sub_ps[NUM_LUMA_PARTITIONS]; > pixel_sub_ps_t chroma_sub_ps[NUM_CHROMA_PARTITIONS]; > diff -r b353d170c54f -r 59646d515e79 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Mon Nov 18 19:34:07 2013 +0530 > +++ b/source/common/x86/asm-primitives.cpp Mon Nov 18 20:53:29 2013 +0530 > @@ -141,7 +141,6 @@ > p.chroma_hps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_horiz_ps_ ## W ## x > ## H ## cpu; \ > p.chroma_vpp[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_pp_ ## W ## x ## > H ## cpu; \ > p.chroma_vps[CHROMA_ ## W ## x ## H] = x265_interp_4tap_vert_ps_ ## W ## x ## > H ## cpu; \ > - p.chroma_copy_ps[CHROMA_ ## W ## x ## H] = x265_blockcopy_ps_ ## W ## x > ## H ## cpu; \ > p.chroma_sub_ps[CHROMA_ ## W ## x ## H] = x265_pixel_sub_ps_ ## W ## x ## H > ## cpu; > > #define SETUP_CHROMA_SP_FUNC_DEF(W, H, cpu) \ > @@ -380,6 +379,36 @@ > SETUP_LUMA_BLOCKCOPY_FUNC_DEF(64, 16, cpu); \ > SETUP_LUMA_BLOCKCOPY_FUNC_DEF(16, 64, cpu); > > +#define SETUP_CHROMA_FROM_LUMA_SSE4(W1, H1, W2, H2, cpu) \ > + p.chroma_copy_ps[X265_CSP_I420][LUMA_ ## W1 ## x ## H1] = > x265_blockcopy_ps_ ## W2 ## x ## H2 ## cpu; > + > +// For X265_CSP_I420 chroma width and height will be half of luma width and > height > +#define CHROMA_BLOCKCOPY_SSE4(cpu) \
When the macro accepts a cpu type argument, adding SSE4 to the name is
redundant (and confusing)
there should probably be a generic I420 macro that maps luma blocks to I420
blocks so adding more color spaces does not multiply amount of code in this file
> + SETUP_CHROMA_FROM_LUMA_SSE4(8, 8, 4, 4, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(8, 4, 4, 2, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(4, 8, 2, 4, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 16, 8, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 8, 8, 4, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(8, 16, 4, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 12, 8, 6, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(12, 16, 6, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 4, 8, 2, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(4, 16, 2, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(32, 32, 16, 16, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(32, 16, 16, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 32, 8, 16, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(32, 24, 16, 12, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(24, 32, 12, 16, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(32, 8, 16, 4, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(8, 32, 4, 16, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(64, 64, 32, 32, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(64, 32, 32, 16, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(32, 64, 16, 32, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(64, 48, 32, 24, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(48, 64, 24, 32, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(64, 16, 32, 8, cpu); \
> + SETUP_CHROMA_FROM_LUMA_SSE4(16, 64, 8, 32, cpu);
> +
> using namespace x265;
>
> namespace {
> @@ -591,6 +620,7 @@
> CHROMA_FILTERS(_sse4);
> LUMA_FILTERS(_sse4);
> HEVC_SATD(sse4);
> + CHROMA_BLOCKCOPY_SSE4(_sse4);
> p.chroma_copy_sp[CHROMA_2x4] = x265_blockcopy_sp_2x4_sse4;
> p.chroma_copy_sp[CHROMA_2x8] = x265_blockcopy_sp_2x8_sse4;
> p.chroma_copy_sp[CHROMA_6x8] = x265_blockcopy_sp_6x8_sse4;
> diff -r b353d170c54f -r 59646d515e79 source/test/pixelharness.cpp
> --- a/source/test/pixelharness.cpp Mon Nov 18 19:34:07 2013 +0530
> +++ b/source/test/pixelharness.cpp Mon Nov 18 20:53:29 2013 +0530
> @@ -763,12 +763,15 @@
> }
> }
>
> - if (opt.chroma_copy_ps[part])
> + for(int i = 0; i < NUM_CSP; i++)
white-space
> {
> - if (!check_block_copy_ps(ref.chroma_copy_ps[part],
> opt.chroma_copy_ps[part]))
> + if (opt.chroma_copy_ps[i][part])
> {
> - printf("chroma_copy_ps[%s] failed\n", chromaPartStr[part]);
> - return false;
> + if (!check_block_copy_ps(ref.chroma_copy_ps[i][part],
> opt.chroma_copy_ps[i][part]))
> + {
> + printf("chroma_copy_ps[%s][%s] failed\n",
> colorSpaceNames[i], chromaPartStr[part]);
> + return false;
> + }
> }
> }
>
> @@ -1051,10 +1054,13 @@
> REPORT_SPEEDUP(opt.luma_copy_ps[part], ref.luma_copy_ps[part], sbuf1, 64,
> pbuf1, 128);
> }
>
> - if (opt.chroma_copy_ps[part])
> + for (int i = 0; i < NUM_CSP; i++)
> {
> - printf("ccpy_ps[%s]", chromaPartStr[part]);
> - REPORT_SPEEDUP(opt.chroma_copy_ps[part], ref.chroma_copy_ps[part],
> sbuf1, 64, pbuf1, 128);
> + if (opt.chroma_copy_ps[i][part])
> + {
> + printf("ccpy_ps[%s][%s]", colorSpaceNames[i],
> chromaPartStr[part]);
> + REPORT_SPEEDUP(opt.chroma_copy_ps[i][part],
> ref.chroma_copy_ps[i][part], sbuf1, 64, pbuf1, 128);
> + }
> }
>
> if (opt.luma_sub_ps[part])
> _______________________________________________
> x265-devel mailing list
> [email protected]
> https://mailman.videolan.org/listinfo/x265-devel
signature.asc
Description: Message signed with OpenPGP using GPGMail
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
