Diego Biurrun <[email protected]> writes:

> ---
>
> The patch from yesterday in a cleaned-up form.  The configure part
> should now be sound.  Implementation of the new structure is only
> done for libavfilter and libavutil to showcase the changes.
> The rest can be done once this has been reviewed.
>
>  configure                      |  103 +++++++++++++++++++++++++++++++--------
>  libavfilter/x86/gradfun.c      |   28 +++--------
>  libavfilter/x86/yadif.c        |   24 ++-------
>  libavutil/x86/float_dsp_init.c |    6 +--
>  4 files changed, 98 insertions(+), 63 deletions(-)
>
> diff --git a/configure b/configure
> index d39db70..8b3a9b2 100755
> --- a/configure
> +++ b/configure
> @@ -625,6 +625,12 @@ add_host_ldflags(){
>      append host_ldflags $($host_ldflags_filter "$@")
>  }
>  
> +add_prefix(){
> +    prefix=$1
> +    shift
> +    for v; do echo ${prefix}${v}; done
> +}
> +
>  check_cmd(){
>      log "$@"
>      "$@" >> $logfile 2>&1
> @@ -1048,26 +1054,34 @@ ARCH_LIST='
>      x86_64
>  '
>  
> -ARCH_EXT_LIST='
> -    altivec
> +ARCH_EXT_LIST_X86='
>      amd3dnow
>      amd3dnowext
> +    avx
> +    fma4
> +    mmx
> +    mmxext
> +    sse
> +    sse2
> +    sse3
> +    sse4
> +    sse42
> +    ssse3
> +'
> +
> +ARCH_EXT_LIST="
> +    $ARCH_EXT_LIST_X86
> +    altivec
>      armv5te
>      armv6
>      armv6t2
>      armvfp
> -    avx
> -    fma4
>      mmi
> -    mmx
> -    mmxext
>      neon
>      ppc4xx
> -    sse
> -    ssse3
>      vfpv3
>      vis
> -'
> +"
>  
>  HAVE_LIST_PUB='
>      bigendian
> @@ -1207,7 +1221,7 @@ HAVE_LIST="
>      yasm
>  "
>  
> -# options emitted with CONFIG_ prefix but not available on command line
> +# options emitted with CONFIG_/HAVE_ prefix but not available on command line
>  CONFIG_EXTRA="

That comment change makes no sense.

>      aandcttables
>      ac3dsp
> @@ -1240,6 +1254,14 @@ CONFIG_EXTRA="
>      vp3dsp
>  "
>  
> +ARCH_EXT_LIST_X86_INLINE=$(add_prefix inline_ $ARCH_EXT_LIST_X86)
> +ARCH_EXT_LIST_X86_YASM=$(add_prefix yasm_  $ARCH_EXT_LIST_X86)

What, no vertical alignment?

> +HAVE_EXTRA="
> +    $ARCH_EXT_LIST_X86_INLINE
> +    $ARCH_EXT_LIST_X86_YASM
> +"

What is the purpose of HAVE_EXTRA?  Adding those things directly to
HAVE_LIST would have exactly the same effect.  HAVE_LIST is already not
available on the command line.

>  CMDLINE_SELECT="
>      $ARCH_EXT_LIST
>      $CONFIG_LIST
> @@ -1318,15 +1340,47 @@ ppc4xx_deps="ppc"
>  vis_deps="sparc"
>  
>  x86_64_suggest="cmov fast_cmov"
> +
>  amd3dnow_deps="mmx"
>  amd3dnowext_deps="amd3dnow"
>  mmx_deps="x86"
>  mmxext_deps="mmx"
> -sse_deps="mmx"
> -ssse3_deps="sse"
> -avx_deps="ssse3"
> +sse_deps="mmxext"
> +sse2_deps="sse"
> +sse3_deps="sse2"
> +ssse3_deps="sse3"
> +sse4_deps="ssse3"
> +sse42_deps="sse4"
> +avx_deps="sse42"
>  fma4_deps="avx"
>  
> +inline_amd3dnow_deps="inline_mmx amd3dnow"
> +inline_amd3dnowext_deps="inline_amd3dnow amd3dnowext"
> +inline_mmx_deps="inline_asm mmx"
> +inline_mmxext_deps="inline_mmx mmxext"
> +inline_sse_deps="inline_mmxext sse"
> +inline_sse2_deps="inline_sse sse2"
> +inline_sse3_deps="inline_sse2 sse3"
> +inline_ssse3_deps="inline_sse3 ssse3"
> +inline_sse4_deps="inline_ssse3 sse4"
> +inline_sse42_deps="inline_sse4 sse42"
> +inline_avx_deps="inline_sse42 avx"
> +inline_fma4_deps="inline_avx fma4"
> +
> +yasm_amd3dnow_deps="yasm_mmx amd3dnow"
> +yasm_amd3dnowext_deps="yasm_amd3dnow amd3dnowext"
> +yasm_mmx_deps="yasm mmx"
> +yasm_mmxext_deps="yasm_mmx mmxext"
> +yasm_sse_deps="yasm_mmxext sse"
> +yasm_sse2_deps="yasm_sse sse2"
> +yasm_sse3_deps="yasm_sse2 sse3"
> +yasm_ssse3_deps="yasm_sse3 ssse3"
> +yasm_sse4_deps="yasm_ssse3 sse4"
> +yasm_sse42_deps="yasm_sse4 sse42"
> +yasm_avx_deps="yasm_sse42 avx"
> +yasm_fma4_deps="yasm_avx fma4"

for ext in $ARCH_EXT_LIST_X86; do
    eval dep=\$${ext}_deps
    eval inline_${ext}_deps='"inline_$dep $ext"'
    eval yasm_${ext}_deps='"yasm_$dep $ext"'
done

Or something like that.

>  aligned_stack_if_any="ppc x86"
>  fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
>  fast_clz_if_any="alpha armv5te avr32 mips ppc x86"
> @@ -1863,7 +1917,10 @@ for n in $COMPONENT_LIST; do
>      eval ${n}_if_any="\$$v"
>  done
>  
> -enable $ARCH_EXT_LIST $ALL_TESTS
> +enable $ARCH_EXT_LIST                   \
> +       $ARCH_EXT_LIST_X86_INLINE        \
> +       $ARCH_EXT_LIST_X86_YASM          \
> +       $ALL_TESTS                       \

This will get out of hand quickly if we want to do something similar for
other architectures.  Could the same thing be achieved by in the loop I
propose above adding this line:

    eval ${ext}_suggest='"inline_$ext yasm_$ext"'

For future expansion, we might also want to use something more generic
than yasm in these variable names.

>  die_unknown(){
>      echo "Unknown option \"$1\"."
> @@ -2933,8 +2990,8 @@ EOF
>      check_inline_asm xmm_clobbers '"":::"%xmm0"'
>  
>      # check whether binutils is new enough to compile SSSE3/MMXEXT
> -    enabled ssse3  && check_inline_asm ssse3  '"pabsw %xmm0, %xmm0"'
> -    enabled mmxext && check_inline_asm mmxext '"pmaxub %mm0, %mm1"'
> +    check_inline_asm inline_ssse3  '"pabsw %xmm0, %xmm0"'
> +    check_inline_asm inline_mmxext '"pmaxub %mm0, %mm1"'
>  
>      if ! disabled_any asm mmx yasm; then
>          if check_cmd $yasmexe --version; then
> @@ -2955,8 +3012,8 @@ EOF
>  
>          check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
>              die "yasm not found, use --disable-yasm for a crippled build"
> -        check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
> -        check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
> +        check_yasm "vextractf128 xmm0, ymm0, 0"      || disable yasm_avx
> +        check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable yasm_fma4
>          check_yasm "CPU amdnop" && enable cpunop
>      fi

These two hunks should be fine.

> @@ -3352,14 +3409,15 @@ fi
>  
>  enabled_any $THREADS_LIST      && enable threads
>  
> +enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }

Moving that line is probably OK.

>  check_deps $CONFIG_LIST       \
>             $CONFIG_EXTRA      \
>             $HAVE_LIST         \
> +           $HAVE_EXTRA        \
>             $ALL_COMPONENTS    \
>             $ALL_TESTS         \
>  
> -enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
> -
>  ! enabled_any memalign posix_memalign aligned_malloc &&
>      enabled_any $need_memalign && enable memalign_hack
>  
> @@ -3606,7 +3664,10 @@ if enabled yasm; then
>  fi
>  
>  print_config ARCH_   "$config_files" $ARCH_LIST
> -print_config HAVE_   "$config_files" $HAVE_LIST
> +
> +print_config HAVE_   "$config_files" $HAVE_LIST         \
> +                                     $HAVE_EXTRA        \
> +
>  print_config CONFIG_ "$config_files" $CONFIG_LIST       \
>                                       $CONFIG_EXTRA      \
>                                       $ALL_COMPONENTS    \

See above about HAVE_EXTRA.

> diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
> index 01429ca..3ec1a2d 100644
> --- a/libavfilter/x86/gradfun.c
> +++ b/libavfilter/x86/gradfun.c
> @@ -24,12 +24,10 @@
>  #include "libavutil/x86/asm.h"
>  #include "libavfilter/gradfun.h"
>  
> -#if HAVE_INLINE_ASM

Why do you remove that line?

>  DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = 
> {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
>  DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = 
> {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
>  
> -#if HAVE_MMXEXT
> +#if HAVE_INLINE_MMXEXT
>  static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t 
> *dc, int width, int thresh, const uint16_t *dithers)
>  {
>      intptr_t x;
> @@ -77,7 +75,7 @@ static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t 
> *src, uint16_t *dc, i
>  }
>  #endif
>  
> -#if HAVE_SSSE3
> +#if HAVE_INLINE_SSSE3
>  static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t 
> *dc, int width, int thresh, const uint16_t *dithers)
>  {
>      intptr_t x;
> @@ -122,9 +120,9 @@ static void gradfun_filter_line_ssse3(uint8_t *dst, 
> uint8_t *src, uint16_t *dc,
>          :"memory"
>      );
>  }
> -#endif // HAVE_SSSE3
> +#endif /* HAVE_INLINE_SSSE3 */
>  
> -#if HAVE_SSE2
> +#if HAVE_INLINE_SSE2
>  static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t 
> *buf1, uint8_t *src, int src_linesize, int width)
>  {
>  #define BLURV(load)\
> @@ -165,26 +163,16 @@ static void gradfun_blur_line_sse2(uint16_t *dc, 
> uint16_t *buf, uint16_t *buf1,
>          BLURV("movdqa");
>      }
>  }
> -#endif /* HAVE_SSE2 */
> -
> -#endif /* HAVE_INLINE_ASM */
> +#endif /* HAVE_INLINE_SSE2 */
>  
>  av_cold void ff_gradfun_init_x86(GradFunContext *gf)
>  {
>      int cpu_flags = av_get_cpu_flags();
>  
> -#if HAVE_INLINE_ASM
> -#if HAVE_MMXEXT
> -    if (cpu_flags & AV_CPU_FLAG_MMXEXT)
> +    if (cpu_flags & AV_CPU_FLAG_MMXEXT && HAVE_INLINE_MMXEXT)
>          gf->filter_line = gradfun_filter_line_mmx2;

This will fail if HAVE_INLINE_MMXEXT == 0.

> -#endif
> -#if HAVE_SSSE3
> -    if (cpu_flags & AV_CPU_FLAG_SSSE3)
> +    if (cpu_flags & AV_CPU_FLAG_SSSE3 && HAVE_INLINE_SSSE3)
>          gf->filter_line = gradfun_filter_line_ssse3;
> -#endif
> -#if HAVE_SSE2
> -    if (cpu_flags & AV_CPU_FLAG_SSE2)
> +    if (cpu_flags & AV_CPU_FLAG_SSE2 && HAVE_INLINE_SSE2)
>          gf->blur_line = gradfun_blur_line_sse2;
> -#endif
> -#endif /* HAVE_INLINE_ASM */
>  }

[...]

> diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
> index d259a36..5c60e1d 100644
> --- a/libavutil/x86/float_dsp_init.c
> +++ b/libavutil/x86/float_dsp_init.c
> @@ -33,16 +33,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const 
> float *src, float mul,
>  
>  void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
>  {
> -#if HAVE_YASM
>      int mm_flags = av_get_cpu_flags();
>  
> -    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
> +    if (mm_flags & AV_CPU_FLAG_SSE && HAVE_YASM_SSE) {
>          fdsp->vector_fmul = ff_vector_fmul_sse;
>          fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
>      }
> -    if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
> +    if (mm_flags & AV_CPU_FLAG_AVX && HAVE_YASM_AVX) {
>          fdsp->vector_fmul = ff_vector_fmul_avx;
>          fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
>      }
> -#endif
>  }

Why do you remove the #if HAVE_YASM?  I imagine it's there for a reason.

-- 
Måns Rullgård
[email protected]
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to