--- libavcodec/x86/dirac_dwt_10bit.asm | 4 +++- libavcodec/x86/dirac_dwt_init_10bit.c | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/libavcodec/x86/dirac_dwt_10bit.asm b/libavcodec/x86/dirac_dwt_10bit.asm index 681de5e1df..ae110d2945 100644 --- a/libavcodec/x86/dirac_dwt_10bit.asm +++ b/libavcodec/x86/dirac_dwt_10bit.asm @@ -24,7 +24,7 @@ SECTION_RODATA cextern pd_1 -pd_2: times 4 dd 2 +pd_2: times 8 dd 2 pd_8: times 4 dd 8 SECTION .text @@ -204,3 +204,5 @@ HAAR_VERTICAL INIT_YMM avx2 HAAR_HORIZONTAL HAAR_VERTICAL +LEGALL53_VERTICAL_HI +LEGALL53_VERTICAL_LO diff --git a/libavcodec/x86/dirac_dwt_init_10bit.c b/libavcodec/x86/dirac_dwt_init_10bit.c index e7e7534050..51d6eeae93 100644 --- a/libavcodec/x86/dirac_dwt_init_10bit.c +++ b/libavcodec/x86/dirac_dwt_init_10bit.c @@ -27,6 +27,8 @@ void ff_dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3 void ff_legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width); void ff_legall53_vertical_lo_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int width); +void ff_legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width); +void ff_legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width); void ff_horizontal_compose_haar_10bit_sse2(int32_t *b0, int32_t *b1, int width_align); void ff_horizontal_compose_haar_10bit_avx(int32_t *b0, int32_t *b1, int width_align); @@ -112,6 +114,22 @@ static void legall53_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); } +static void legall53_vertical_lo_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width) +{ + int i = width & ~7; + ff_legall53_vertical_lo_avx2(b0, b1, b2, i); + for(; i<width; i++) + b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); +} + +static void legall53_vertical_hi_avx2(int32_t *b0, int32_t *b1, int32_t *b2, int width) +{ + int i = width & ~7; + ff_legall53_vertical_hi_avx2(b0, b1, b2, i); + for(; i<width; i++) + b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); +} + static void dd97_vertical_hi_sse2(int32_t *b0, int32_t *b1, int32_t *b2, int32_t *b3, int32_t *b4, int width) { @@ -161,6 +179,10 @@ av_cold void ff_spatial_idwt_init_10bit_x86(DWTContext *d, enum dwt_type type) if (EXTERNAL_AVX2(cpu_flags)) { switch (type) { + case DWT_DIRAC_LEGALL5_3: + d->vertical_compose_h0 = (void*)legall53_vertical_hi_avx2; + d->vertical_compose_l0 = (void*)legall53_vertical_lo_avx2; + break; case DWT_DIRAC_HAAR0: d->vertical_compose = (void*)vertical_compose_haar_avx2; break; -- 2.17.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel