On Wed, Oct 05, 2016 at 02:04:31PM +0200, Alexandra Hájková wrote: > --- a/libavcodec/x86/hevc_idct.asm > +++ b/libavcodec/x86/hevc_idct.asm > @@ -74,34 +286,578 @@ cglobal hevc_idct_%1x%1_dc_%2, 1, 2, 1, coeff, tmp > > INIT_XMM sse2 > +IDCT_DC_NL 8, %1 > +IDCT_DC 16, 4, %1 > +IDCT_DC 32, 16, %1 > +%if %1 == 8 > +TRANSPOSE_8x8 > +%if ARCH_X86_64 > +TRANSPOSE_16x16 > +TRANSPOSE_32x32 > +%endif > +%endif > +%define transpose_8x8 hevc_idct_transpose_8x8_sse2 > +%if ARCH_X86_64 > +%define transpose_16x16 hevc_idct_transpose_16x16_sse2 > +%define transpose_32x32 hevc_idct_transpose_32x32_sse2 > +IDCT_32x32 %1 > +IDCT_16x16 %1 > +%endif
There should be no need to redefine the transpose functions, just call the right one with the help of the cpuname macro. > --- a/libavcodec/x86/hevcdsp_init.c > +++ b/libavcodec/x86/hevcdsp_init.c > @@ -329,6 +361,11 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int > bit_depth) > #if HAVE_AVX_EXTERNAL > SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv); > SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv); > + > + c->idct[0] = ff_hevc_idct_4x4_8_avx; > + c->idct[1] = ff_hevc_idct_8x8_8_avx; > + c->idct[2] = ff_hevc_idct_16x16_8_avx; > + c->idct[3] = ff_hevc_idct_32x32_8_avx; > #endif /* HAVE_AVX_EXTERNAL */ > } Only the parts that are explicitly ifdeffed above within this very file should be ifdeffed here. Add these below the ifdef. > @@ -354,6 +397,12 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int > bit_depth) > SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h); > SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v); > SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv); > + > + c->idct[0] = ff_hevc_idct_4x4_10_avx; > + c->idct[1] = ff_hevc_idct_8x8_10_avx; > + c->idct[2] = ff_hevc_idct_16x16_10_avx; > + c->idct[3] = ff_hevc_idct_32x32_10_avx; > + > #endif /* HAVE_AVX_EXTERNAL */ > } > if (EXTERNAL_AVX2(cpu_flags)) { same Diego _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel