Hi, On Sat, Mar 18, 2017 at 3:50 PM, Mirage Abeysekara <mirage...@cse.mrt.ac.lk> wrote:
> --- > libavcodec/x86/h264_intrapred.asm | 37 ++++++++++++++++++++++++++++++ > ++++++ > libavcodec/x86/h264_intrapred_init.c | 7 +++++++ > 2 files changed, 44 insertions(+) > > diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_ > intrapred.asm > index c88d91b..0f3b462 100644 > --- a/libavcodec/x86/h264_intrapred.asm > +++ b/libavcodec/x86/h264_intrapred.asm > @@ -268,6 +268,43 @@ cglobal pred16x16_tm_vp8_8, 2,6,6 > jg .loop > REP_RET > > +%if HAVE_AVX2_EXTERNAL > +INIT_YMM avx2 > +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration > + sub dstq, strideq > + pmovzxbw m0, [dstq] > + vpbroadcastb xm1, [r0-1] > + pmovzxbw m1, xm1 > + psubw m0, m1 > + mov iterationd, 4 > + lea stride3q, [strideq*3] > +.loop: > + vpbroadcastb xm1, [dstq+strideq*1-1] > + vpbroadcastb xm2, [dstq+strideq*2-1] > + vpbroadcastb xm3, [dstq+stride3q-1] > + vpbroadcastb xm4, [dstq+strideq*4-1] > + pmovzxbw m1, xm1 > + pmovzxbw m2, xm2 > + pmovzxbw m3, xm3 > + pmovzxbw m4, xm4 > + paddw m1, m0 > + paddw m2, m0 > + paddw m3, m0 > + paddw m4, m0 > + vpackuswb m1, m1, m2 > + vpackuswb m3, m3, m4 > + vpermq m1, m1, q3120 > + vpermq m3, m3, q3120 > + movdqa [dstq+strideq*1], xm1 > + vextracti128 [dstq+strideq*2], m1, 1 > + movdqa [dstq+stride3q*1], xm3 > + vextracti128 [dstq+strideq*4], m3, 1 > + lea dstq, [dstq+strideq*4] > + dec iterationd > + jg .loop > + REP_RET > +%endif > + > ;----------------------------------------------------------- > ------------------ > ; void ff_pred16x16_plane_*_8(uint8_t *src, int stride) > ;----------------------------------------------------------- > ------------------ > diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_ > intrapred_init.c > index 528b92e..bdd5125 100644 > --- a/libavcodec/x86/h264_intrapred_init.c > +++ b/libavcodec/x86/h264_intrapred_init.c > @@ -127,6 +127,7 @@ PRED16x16(plane_svq3, 8, ssse3) > PRED16x16(tm_vp8, 8, mmx) > PRED16x16(tm_vp8, 8, mmxext) > PRED16x16(tm_vp8, 8, sse2) > +PRED16x16(tm_vp8, 8, avx2) > > PRED8x8(top_dc, 8, mmxext) > PRED8x8(dc_rv40, 8, mmxext) > @@ -323,6 +324,12 @@ av_cold void ff_h264_pred_init_x86(H264PredContext > *h, int codec_id, > } > } > } > + > + if(EXTERNAL_AVX2(cpu_flags)){ > + if (codec_id == AV_CODEC_ID_VP8) { > + h->pred16x16[PLANE_PRED8x8 ] = > ff_pred16x16_tm_vp8_8_avx2; > + } > + } > } else if (bit_depth == 10) { > if (EXTERNAL_MMXEXT(cpu_flags)) { > h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; > -- > 2.7.4 Pushed. Ronald _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel