Diego Biurrun <[email protected]> writes:
> ---
>
> The patch from yesterday in a cleaned-up form. The configure part
> should now be sound. Implementation of the new structure is only
> done for libavfilter and libavutil to showcase the changes.
> The rest can be done once this has been reviewed.
>
> configure | 103 +++++++++++++++++++++++++++++++--------
> libavfilter/x86/gradfun.c | 28 +++--------
> libavfilter/x86/yadif.c | 24 ++-------
> libavutil/x86/float_dsp_init.c | 6 +--
> 4 files changed, 98 insertions(+), 63 deletions(-)
>
> diff --git a/configure b/configure
> index d39db70..8b3a9b2 100755
> --- a/configure
> +++ b/configure
> @@ -625,6 +625,12 @@ add_host_ldflags(){
> append host_ldflags $($host_ldflags_filter "$@")
> }
>
> +add_prefix(){
> + prefix=$1
> + shift
> + for v; do echo ${prefix}${v}; done
> +}
> +
> check_cmd(){
> log "$@"
> "$@" >> $logfile 2>&1
> @@ -1048,26 +1054,34 @@ ARCH_LIST='
> x86_64
> '
>
> -ARCH_EXT_LIST='
> - altivec
> +ARCH_EXT_LIST_X86='
> amd3dnow
> amd3dnowext
> + avx
> + fma4
> + mmx
> + mmxext
> + sse
> + sse2
> + sse3
> + sse4
> + sse42
> + ssse3
> +'
> +
> +ARCH_EXT_LIST="
> + $ARCH_EXT_LIST_X86
> + altivec
> armv5te
> armv6
> armv6t2
> armvfp
> - avx
> - fma4
> mmi
> - mmx
> - mmxext
> neon
> ppc4xx
> - sse
> - ssse3
> vfpv3
> vis
> -'
> +"
>
> HAVE_LIST_PUB='
> bigendian
> @@ -1207,7 +1221,7 @@ HAVE_LIST="
> yasm
> "
>
> -# options emitted with CONFIG_ prefix but not available on command line
> +# options emitted with CONFIG_/HAVE_ prefix but not available on command line
> CONFIG_EXTRA="
That comment change makes no sense.
> aandcttables
> ac3dsp
> @@ -1240,6 +1254,14 @@ CONFIG_EXTRA="
> vp3dsp
> "
>
> +ARCH_EXT_LIST_X86_INLINE=$(add_prefix inline_ $ARCH_EXT_LIST_X86)
> +ARCH_EXT_LIST_X86_YASM=$(add_prefix yasm_ $ARCH_EXT_LIST_X86)
What, no vertical alignment?
> +HAVE_EXTRA="
> + $ARCH_EXT_LIST_X86_INLINE
> + $ARCH_EXT_LIST_X86_YASM
> +"
What is the purpose of HAVE_EXTRA? Adding those things directly to
HAVE_LIST would have exactly the same effect. HAVE_LIST is already not
available on the command line.
> CMDLINE_SELECT="
> $ARCH_EXT_LIST
> $CONFIG_LIST
> @@ -1318,15 +1340,47 @@ ppc4xx_deps="ppc"
> vis_deps="sparc"
>
> x86_64_suggest="cmov fast_cmov"
> +
> amd3dnow_deps="mmx"
> amd3dnowext_deps="amd3dnow"
> mmx_deps="x86"
> mmxext_deps="mmx"
> -sse_deps="mmx"
> -ssse3_deps="sse"
> -avx_deps="ssse3"
> +sse_deps="mmxext"
> +sse2_deps="sse"
> +sse3_deps="sse2"
> +ssse3_deps="sse3"
> +sse4_deps="ssse3"
> +sse42_deps="sse4"
> +avx_deps="sse42"
> fma4_deps="avx"
>
> +inline_amd3dnow_deps="inline_mmx amd3dnow"
> +inline_amd3dnowext_deps="inline_amd3dnow amd3dnowext"
> +inline_mmx_deps="inline_asm mmx"
> +inline_mmxext_deps="inline_mmx mmxext"
> +inline_sse_deps="inline_mmxext sse"
> +inline_sse2_deps="inline_sse sse2"
> +inline_sse3_deps="inline_sse2 sse3"
> +inline_ssse3_deps="inline_sse3 ssse3"
> +inline_sse4_deps="inline_ssse3 sse4"
> +inline_sse42_deps="inline_sse4 sse42"
> +inline_avx_deps="inline_sse42 avx"
> +inline_fma4_deps="inline_avx fma4"
> +
> +yasm_amd3dnow_deps="yasm_mmx amd3dnow"
> +yasm_amd3dnowext_deps="yasm_amd3dnow amd3dnowext"
> +yasm_mmx_deps="yasm mmx"
> +yasm_mmxext_deps="yasm_mmx mmxext"
> +yasm_sse_deps="yasm_mmxext sse"
> +yasm_sse2_deps="yasm_sse sse2"
> +yasm_sse3_deps="yasm_sse2 sse3"
> +yasm_ssse3_deps="yasm_sse3 ssse3"
> +yasm_sse4_deps="yasm_ssse3 sse4"
> +yasm_sse42_deps="yasm_sse4 sse42"
> +yasm_avx_deps="yasm_sse42 avx"
> +yasm_fma4_deps="yasm_avx fma4"
for ext in $ARCH_EXT_LIST_X86; do
eval dep=\$${ext}_deps
eval inline_${ext}_deps='"inline_$dep $ext"'
eval yasm_${ext}_deps='"yasm_$dep $ext"'
done
Or something like that.
> aligned_stack_if_any="ppc x86"
> fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64"
> fast_clz_if_any="alpha armv5te avr32 mips ppc x86"
> @@ -1863,7 +1917,10 @@ for n in $COMPONENT_LIST; do
> eval ${n}_if_any="\$$v"
> done
>
> -enable $ARCH_EXT_LIST $ALL_TESTS
> +enable $ARCH_EXT_LIST \
> + $ARCH_EXT_LIST_X86_INLINE \
> + $ARCH_EXT_LIST_X86_YASM \
> + $ALL_TESTS \
This will get out of hand quickly if we want to do something similar for
other architectures. Could the same thing be achieved by in the loop I
propose above adding this line:
eval ${ext}_suggest='"inline_$ext yasm_$ext"'
For future expansion, we might also want to use something more generic
than yasm in these variable names.
> die_unknown(){
> echo "Unknown option \"$1\"."
> @@ -2933,8 +2990,8 @@ EOF
> check_inline_asm xmm_clobbers '"":::"%xmm0"'
>
> # check whether binutils is new enough to compile SSSE3/MMXEXT
> - enabled ssse3 && check_inline_asm ssse3 '"pabsw %xmm0, %xmm0"'
> - enabled mmxext && check_inline_asm mmxext '"pmaxub %mm0, %mm1"'
> + check_inline_asm inline_ssse3 '"pabsw %xmm0, %xmm0"'
> + check_inline_asm inline_mmxext '"pmaxub %mm0, %mm1"'
>
> if ! disabled_any asm mmx yasm; then
> if check_cmd $yasmexe --version; then
> @@ -2955,8 +3012,8 @@ EOF
>
> check_yasm "pextrd [eax], xmm0, 1" && enable yasm ||
> die "yasm not found, use --disable-yasm for a crippled build"
> - check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx
> - check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4
> + check_yasm "vextractf128 xmm0, ymm0, 0" || disable yasm_avx
> + check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable yasm_fma4
> check_yasm "CPU amdnop" && enable cpunop
> fi
These two hunks should be fine.
> @@ -3352,14 +3409,15 @@ fi
>
> enabled_any $THREADS_LIST && enable threads
>
> +enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
Moving that line is probably OK.
> check_deps $CONFIG_LIST \
> $CONFIG_EXTRA \
> $HAVE_LIST \
> + $HAVE_EXTRA \
> $ALL_COMPONENTS \
> $ALL_TESTS \
>
> -enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
> -
> ! enabled_any memalign posix_memalign aligned_malloc &&
> enabled_any $need_memalign && enable memalign_hack
>
> @@ -3606,7 +3664,10 @@ if enabled yasm; then
> fi
>
> print_config ARCH_ "$config_files" $ARCH_LIST
> -print_config HAVE_ "$config_files" $HAVE_LIST
> +
> +print_config HAVE_ "$config_files" $HAVE_LIST \
> + $HAVE_EXTRA \
> +
> print_config CONFIG_ "$config_files" $CONFIG_LIST \
> $CONFIG_EXTRA \
> $ALL_COMPONENTS \
See above about HAVE_EXTRA.
> diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c
> index 01429ca..3ec1a2d 100644
> --- a/libavfilter/x86/gradfun.c
> +++ b/libavfilter/x86/gradfun.c
> @@ -24,12 +24,10 @@
> #include "libavutil/x86/asm.h"
> #include "libavfilter/gradfun.h"
>
> -#if HAVE_INLINE_ASM
Why do you remove that line?
> DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] =
> {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
> DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] =
> {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
>
> -#if HAVE_MMXEXT
> +#if HAVE_INLINE_MMXEXT
> static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t
> *dc, int width, int thresh, const uint16_t *dithers)
> {
> intptr_t x;
> @@ -77,7 +75,7 @@ static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t
> *src, uint16_t *dc, i
> }
> #endif
>
> -#if HAVE_SSSE3
> +#if HAVE_INLINE_SSSE3
> static void gradfun_filter_line_ssse3(uint8_t *dst, uint8_t *src, uint16_t
> *dc, int width, int thresh, const uint16_t *dithers)
> {
> intptr_t x;
> @@ -122,9 +120,9 @@ static void gradfun_filter_line_ssse3(uint8_t *dst,
> uint8_t *src, uint16_t *dc,
> :"memory"
> );
> }
> -#endif // HAVE_SSSE3
> +#endif /* HAVE_INLINE_SSSE3 */
>
> -#if HAVE_SSE2
> +#if HAVE_INLINE_SSE2
> static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, uint16_t
> *buf1, uint8_t *src, int src_linesize, int width)
> {
> #define BLURV(load)\
> @@ -165,26 +163,16 @@ static void gradfun_blur_line_sse2(uint16_t *dc,
> uint16_t *buf, uint16_t *buf1,
> BLURV("movdqa");
> }
> }
> -#endif /* HAVE_SSE2 */
> -
> -#endif /* HAVE_INLINE_ASM */
> +#endif /* HAVE_INLINE_SSE2 */
>
> av_cold void ff_gradfun_init_x86(GradFunContext *gf)
> {
> int cpu_flags = av_get_cpu_flags();
>
> -#if HAVE_INLINE_ASM
> -#if HAVE_MMXEXT
> - if (cpu_flags & AV_CPU_FLAG_MMXEXT)
> + if (cpu_flags & AV_CPU_FLAG_MMXEXT && HAVE_INLINE_MMXEXT)
> gf->filter_line = gradfun_filter_line_mmx2;
This will fail if HAVE_INLINE_MMXEXT == 0.
> -#endif
> -#if HAVE_SSSE3
> - if (cpu_flags & AV_CPU_FLAG_SSSE3)
> + if (cpu_flags & AV_CPU_FLAG_SSSE3 && HAVE_INLINE_SSSE3)
> gf->filter_line = gradfun_filter_line_ssse3;
> -#endif
> -#if HAVE_SSE2
> - if (cpu_flags & AV_CPU_FLAG_SSE2)
> + if (cpu_flags & AV_CPU_FLAG_SSE2 && HAVE_INLINE_SSE2)
> gf->blur_line = gradfun_blur_line_sse2;
> -#endif
> -#endif /* HAVE_INLINE_ASM */
> }
[...]
> diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
> index d259a36..5c60e1d 100644
> --- a/libavutil/x86/float_dsp_init.c
> +++ b/libavutil/x86/float_dsp_init.c
> @@ -33,16 +33,14 @@ extern void ff_vector_fmac_scalar_avx(float *dst, const
> float *src, float mul,
>
> void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
> {
> -#if HAVE_YASM
> int mm_flags = av_get_cpu_flags();
>
> - if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
> + if (mm_flags & AV_CPU_FLAG_SSE && HAVE_YASM_SSE) {
> fdsp->vector_fmul = ff_vector_fmul_sse;
> fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
> }
> - if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
> + if (mm_flags & AV_CPU_FLAG_AVX && HAVE_YASM_AVX) {
> fdsp->vector_fmul = ff_vector_fmul_avx;
> fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
> }
> -#endif
> }
Why do you remove the #if HAVE_YASM? I imagine it's there for a reason.
--
Måns Rullgård
[email protected]
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel