# HG changeset patch # User Dnyaneshwar G <dnyanesh...@multicorewareinc.com> # Date 1435323067 -19800 # Fri Jun 26 18:21:07 2015 +0530 # Node ID 44b574b61b29a3cfba99e8f0d06622e44a86df17 # Parent d64227e54233d1646c55bcb4b0b831e5340009ed asm: intra_filter4x4 sse4 code and added testbench support, improved 357c->141c over C code
diff -r d64227e54233 -r 44b574b61b29 source/common/x86/asm-primitives.cpp --- a/source/common/x86/asm-primitives.cpp Thu Jun 25 16:25:51 2015 +0530 +++ b/source/common/x86/asm-primitives.cpp Fri Jun 26 18:21:07 2015 +0530 @@ -2453,6 +2453,8 @@ p.weight_pp = PFX(weight_pp_sse4); p.weight_sp = PFX(weight_sp_sse4); + p.cu[BLOCK_4x4].intra_filter = PFX(intra_filter_4x4_sse4); + ALL_LUMA_TU_S(intra_pred[PLANAR_IDX], intra_pred_planar, sse4); ALL_LUMA_TU_S(intra_pred[DC_IDX], intra_pred_dc, sse4); ALL_LUMA_TU(intra_pred_allangs, all_angs_pred, sse4); diff -r d64227e54233 -r 44b574b61b29 source/common/x86/intrapred.h --- a/source/common/x86/intrapred.h Thu Jun 25 16:25:51 2015 +0530 +++ b/source/common/x86/intrapred.h Fri Jun 26 18:21:07 2015 +0530 @@ -66,6 +66,7 @@ #define DECL_ALL(cpu) \ FUNCDEF_TU(void, all_angs_pred, cpu, pixel *dest, pixel *refPix, pixel *filtPix, int bLuma); \ + FUNCDEF_TU(void, intra_filter, cpu, const pixel *samples, pixel *filtered); \ DECL_ANGS(4, cpu); \ DECL_ANGS(8, cpu); \ DECL_ANGS(16, cpu); \ diff -r d64227e54233 -r 44b574b61b29 source/common/x86/intrapred8.asm --- a/source/common/x86/intrapred8.asm Thu Jun 25 16:25:51 2015 +0530 +++ b/source/common/x86/intrapred8.asm Fri Jun 26 18:21:07 2015 +0530 @@ -30,6 +30,9 @@ intra_pred_shuff_0_8: times 2 db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 intra_pred_shuff_15_0: times 2 db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +intra_filter4_shuf0: db 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13 +intra_filter4_shuf1: db 14,15,0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ,11, 12, 13 + pb_0_8 times 8 db 0, 8 pb_unpackbw1 times 2 db 1, 8, 2, 8, 3, 8, 4, 8 pb_swap8: times 2 db 7, 6, 5, 4, 3, 2, 1, 0 @@ -18276,3 +18279,44 @@ INTRA_PRED_STORE_4x4 RET + +;----------------------------------------------------------------------------------- +; void intra_filter_NxN(const pixel* references, pixel* filtered) +;----------------------------------------------------------------------------------- +INIT_XMM sse4 +cglobal intra_filter_4x4, 2,4,5 + mov r2b, byte [r0 + 8] ; topLast + mov r3b, byte [r0 + 16] ; LeftLast + + ; filtering top + pmovzxbw m0, [r0 + 0] + pmovzxbw m1, [r0 + 8] + pmovzxbw m2, [r0 + 16] + + pshufb m4, m0, [intra_filter4_shuf0] ; [6 5 4 3 2 1 0 1] samples[i - 1] + palignr m3, m1, m0, 4 + pshufb m3, [intra_filter4_shuf1] ; [8 7 6 5 4 3 2 9] samples[i + 1] + + psllw m0, 1 + paddw m4, m3 + paddw m0, m4 + paddw m0, [pw_2] + psrlw m0, 2 + + ; filtering left + palignr m4, m1, m1, 14 ; [14 13 12 11 10 9 8 15] samples[i - 1] + pinsrb m4, [r0], 2 ; [14 13 12 11 10 9 0 15] samples[i + 1] + palignr m3, m2, m1, 4 + pshufb m3, [intra_filter4_shuf1] + + psllw m1, 1 + paddw m4, m3 + paddw m1, m4 + paddw m1, [pw_2] + psrlw m1, 2 + packuswb m0, m1 + + movu [r1], m0 + mov [r1 + 8], r2b ; topLast + mov [r1 + 16], r3b ; LeftLast + RET diff -r d64227e54233 -r 44b574b61b29 source/test/intrapredharness.cpp --- a/source/test/intrapredharness.cpp Thu Jun 25 16:25:51 2015 +0530 +++ b/source/test/intrapredharness.cpp Fri Jun 26 18:21:07 2015 +0530 @@ -31,6 +31,16 @@ { for (int i = 0; i < INPUT_SIZE; i++) pixel_buff[i] = rand() % PIXEL_MAX; + + /* [0] --- Random values + * [1] --- Minimum + * [2] --- Maximum */ + for (int i = 0; i < BUFFSIZE; i++) + { + pixel_test_buff[0][i] = rand() % PIXEL_MAX; + pixel_test_buff[1][i] = PIXEL_MIN; + pixel_test_buff[2][i] = PIXEL_MAX; + } } bool IntraPredHarness::check_dc_primitive(intra_pred_t ref, intra_pred_t opt, int width) @@ -177,6 +187,27 @@ return true; } +bool IntraPredHarness::check_intra_filter_primitive(const intra_filter_t ref, const intra_filter_t opt) +{ + memset(pixel_out_c, 0, 64 * 64 * sizeof(pixel)); + memset(pixel_out_vec, 0, 64 * 64 * sizeof(pixel)); + int j = 0; + + for (int i = 0; i < 100; i++) + { + int index = rand() % TEST_CASES; + + ref(pixel_test_buff[index] + j, pixel_out_c); + checked(opt, pixel_test_buff[index] + j, pixel_out_vec); + + if (memcmp(pixel_out_c, pixel_out_vec, 64 * 64 * sizeof(pixel))) + return false; + + reportfail(); + j += FENC_STRIDE; + } + return true; +} bool IntraPredHarness::testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) { for (int i = BLOCK_4x4; i <= BLOCK_32x32; i++) @@ -213,6 +244,14 @@ return false; } } + if (opt.cu[i].intra_filter) + { + if (!check_intra_filter_primitive(ref.cu[i].intra_filter, opt.cu[i].intra_filter)) + { + printf("intra_filter_%dx%d failed\n", size, size); + return false; + } + } } return true; @@ -268,5 +307,10 @@ pixel_out_vec, FENC_STRIDE, pixel_buff + srcStride, mode, bFilter); } } + if (opt.cu[i].intra_filter) + { + printf("intra_filter_%dx%d", size, size); + REPORT_SPEEDUP(opt.cu[i].intra_filter, ref.cu[i].intra_filter, pixel_buff, pixel_out_c); + } } } diff -r d64227e54233 -r 44b574b61b29 source/test/intrapredharness.h --- a/source/test/intrapredharness.h Thu Jun 25 16:25:51 2015 +0530 +++ b/source/test/intrapredharness.h Fri Jun 26 18:21:07 2015 +0530 @@ -34,7 +34,15 @@ enum { INPUT_SIZE = 4 * 65 * 65 * 100 }; enum { OUTPUT_SIZE = 64 * FENC_STRIDE }; enum { OUTPUT_SIZE_33 = 33 * OUTPUT_SIZE }; + enum { TEST_CASES = 3 }; + enum { INCR = 32 }; + enum { STRIDE = 64 }; + enum { ITERS = 100 }; + enum { MAX_HEIGHT = 64 }; + enum { PAD_ROWS = 64 }; + enum { BUFFSIZE = STRIDE * (MAX_HEIGHT + PAD_ROWS) + INCR * ITERS }; + pixel pixel_test_buff[TEST_CASES][BUFFSIZE]; ALIGN_VAR_16(pixel, pixel_buff[INPUT_SIZE]); pixel pixel_out_c[OUTPUT_SIZE]; pixel pixel_out_vec[OUTPUT_SIZE]; @@ -45,6 +53,7 @@ bool check_planar_primitive(intra_pred_t ref, intra_pred_t opt, int width); bool check_angular_primitive(const intra_pred_t ref[], const intra_pred_t opt[], int size); bool check_allangs_primitive(const intra_allangs_t ref, const intra_allangs_t opt, int size); + bool check_intra_filter_primitive(const intra_filter_t ref, const intra_filter_t opt); public: _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel