On Wed, 4 Nov 2015 16:15:36 +0100 Michael Niedermayer <mich...@niedermayer.cc> wrote:
> On Fri, Oct 23, 2015 at 07:18:06PM +0200, wm4 wrote: > > On Fri, 23 Oct 2015 18:08:00 +0200 > > Michael Niedermayer <michae...@gmx.at> wrote: > > > > > From: Michael Niedermayer <mich...@niedermayer.cc> > > > > > > Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc> > > > --- > > > libswresample/aarch64/audio_convert_init.c | 8 ++++- > > > libswresample/arm/audio_convert_init.c | 8 ++++- > > > libswresample/audioconvert.c | 44 > > > ++++++++++++++++++++++++++-- > > > libswresample/options.c | 1 + > > > libswresample/swresample.c | 6 ++-- > > > libswresample/swresample.h | 1 + > > > libswresample/swresample_internal.h | 6 ++-- > > > libswresample/x86/audio_convert_init.c | 8 ++++- > > > 8 files changed, 71 insertions(+), 11 deletions(-) > > > > > > diff --git a/libswresample/aarch64/audio_convert_init.c > > > b/libswresample/aarch64/audio_convert_init.c > > > index 60e24ad..dedb1aa 100644 > > > --- a/libswresample/aarch64/audio_convert_init.c > > > +++ b/libswresample/aarch64/audio_convert_init.c > > > @@ -48,12 +48,18 @@ static void conv_fltp_to_s16_nch_neon(uint8_t **dst, > > > const uint8_t **src, int le > > > av_cold void swri_audio_convert_init_aarch64(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels) > > > + int channels, int flags) > > > { > > > int cpu_flags = av_get_cpu_flags(); > > > > > > ac->simd_f= NULL; > > > > > > + if ( (flags & SWR_FLAG_CLIP) > > > + && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT > > > + && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) { > > > + return; > > > + } > > > + > > > if (have_neon(cpu_flags)) { > > > if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT > > > || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) > > > ac->simd_f = conv_flt_to_s16_neon; > > > diff --git a/libswresample/arm/audio_convert_init.c > > > b/libswresample/arm/audio_convert_init.c > > > index ec9e62e..f39978d 100644 > > > --- a/libswresample/arm/audio_convert_init.c > > > +++ b/libswresample/arm/audio_convert_init.c > > > @@ -48,12 +48,18 @@ static void conv_fltp_to_s16_nch_neon(uint8_t **dst, > > > const uint8_t **src, int le > > > av_cold void swri_audio_convert_init_arm(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels) > > > + int channels, int flags) > > > { > > > int cpu_flags = av_get_cpu_flags(); > > > > > > ac->simd_f= NULL; > > > > > > + if ( (flags & SWR_FLAG_CLIP) > > > + && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT > > > + && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) { > > > + return; > > > + } > > > + > > > if (have_neon(cpu_flags)) { > > > if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT > > > || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) > > > ac->simd_f = conv_flt_to_s16_neon; > > > diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c > > > index 58b0bf3..dc6734a 100644 > > > --- a/libswresample/audioconvert.c > > > +++ b/libswresample/audioconvert.c > > > @@ -77,6 +77,27 @@ CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, > > > AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(* > > > CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_DBL, *(const > > > double*)pi) > > > CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const > > > double*)pi) > > > > > > +#define CONV_FUNC_NAME_CLIP(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ > > > ## dst_fmt ## _clip > > > +#define CONV_FUNC_CLIP(ofmt, otype, ifmt, expr)\ > > > +static void CONV_FUNC_NAME_CLIP(ofmt, ifmt)(uint8_t *po, const uint8_t > > > *pi, int is, int os, uint8_t *end)\ > > > +{\ > > > + uint8_t *end2 = end - 3*os;\ > > > + while(po < end2){\ > > > + *(otype*)po = expr; pi += is; po += os;\ > > > + *(otype*)po = expr; pi += is; po += os;\ > > > + *(otype*)po = expr; pi += is; po += os;\ > > > + *(otype*)po = expr; pi += is; po += os;\ > > > + }\ > > > + while(po < end){\ > > > + *(otype*)po = expr; pi += is; po += os;\ > > > + }\ > > > +} > > > + > > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_FLT, > > > av_clipf(*(const float*)pi, -1.0, 1.0)) > > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, > > > av_clipf(*(const float*)pi, -1.0, 1.0)) > > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_DBL, > > > av_clipf(*(const double*)pi, -1.0, 1.0)) > > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, > > > av_clipd(*(const double*)pi, -1.0, 1.0)) > > > + > > > #define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = > > > CONV_FUNC_NAME(out, in) > > > > > > static conv_func_type * const > > > fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = { > > > @@ -107,6 +128,15 @@ static conv_func_type * const > > > fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAM > > > FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), > > > }; > > > > > > +#define FMT_PAIR_FUNC_CLIP(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = > > > CONV_FUNC_NAME_CLIP(out, in) > > > + > > > +static conv_func_type * const > > > fmt_pair_to_conv_functions_clip[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = { > > > + FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), > > > + FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), > > > + FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), > > > + FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), > > > +}; > > > + > > > static void cpy1(uint8_t **dst, const uint8_t **src, int len){ > > > memcpy(*dst, *src, len); > > > } > > > @@ -154,9 +184,17 @@ AudioConvert *swri_audio_convert_alloc(enum > > > AVSampleFormat out_fmt, > > > } > > > } > > > > > > - if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, > > > in_fmt, channels); > > > - if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, > > > in_fmt, channels); > > > - if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, > > > out_fmt, in_fmt, channels); > > > + if (flags & SWR_FLAG_CLIP) { > > > + conv_func_type *f2 = > > > fmt_pair_to_conv_functions_clip[av_get_packed_sample_fmt(out_fmt) + > > > AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]; > > > + if (f2) { > > > + f = f2; > > > + ctx->simd_f = NULL; > > > + } > > > + } > > > + > > > + if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, > > > in_fmt, channels, flags); > > > + if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, > > > in_fmt, channels, flags); > > > + if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, > > > out_fmt, in_fmt, channels, flags); > > > > > > return ctx; > > > } > > > diff --git a/libswresample/options.c b/libswresample/options.c > > > index 0bcb102..bb68158 100644 > > > --- a/libswresample/options.c > > > +++ b/libswresample/options.c > > > @@ -67,6 +67,7 @@ static const AVOption options[]={ > > > {"flags" , "set flags" , OFFSET(flags > > > ), AV_OPT_TYPE_FLAGS, {.i64=0 }, 0 , > > > UINT_MAX , PARAM, "flags"}, > > > {"swr_flags" , "set flags" , OFFSET(flags > > > ), AV_OPT_TYPE_FLAGS, {.i64=0 }, 0 , > > > UINT_MAX , PARAM, "flags"}, > > > {"res" , "force resampling" , 0 > > > , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_RESAMPLE }, INT_MIN, > > > INT_MAX , PARAM, "flags"}, > > > +{"clip" , "clip float/double to -1.0..1.0", 0 > > > , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_CLIP }, INT_MIN, > > > INT_MAX , PARAM, "flags"}, > > > > > > {"dither_scale" , "set dither scale" , > > > OFFSET(dither.scale ), AV_OPT_TYPE_FLOAT, {.dbl=1 > > > }, 0 , INT_MAX , PARAM}, > > > > > > diff --git a/libswresample/swresample.c b/libswresample/swresample.c > > > index 8e23899..029b85e 100644 > > > --- a/libswresample/swresample.c > > > +++ b/libswresample/swresample.c > > > @@ -324,14 +324,14 @@ av_assert0(s->out.ch_count); > > > > > > if(!s->resample && !s->rematrix && !s->channel_map && > > > !s->dither.method){ > > > s->full_convert = swri_audio_convert_alloc(s->out_sample_fmt, > > > - s-> in_sample_fmt, > > > s-> in.ch_count, NULL, 0); > > > + s-> in_sample_fmt, > > > s-> in.ch_count, NULL, s->flags & SWR_FLAG_CLIP); > > > return 0; > > > } > > > > > > s->in_convert = swri_audio_convert_alloc(s->int_sample_fmt, > > > s-> in_sample_fmt, > > > s->used_ch_count, s->channel_map, 0); > > > s->out_convert= swri_audio_convert_alloc(s->out_sample_fmt, > > > - s->int_sample_fmt, > > > s->out.ch_count, NULL, 0); > > > + s->int_sample_fmt, > > > s->out.ch_count, NULL, s->flags & SWR_FLAG_CLIP); > > > > > > if (!s->in_convert || !s->out_convert) { > > > ret = AVERROR(ENOMEM); > > > @@ -606,6 +606,7 @@ static int swr_convert_internal(struct SwrContext *s, > > > AudioData *out, int out_co > > > preout= midbuf; > > > > > > if(s->int_sample_fmt == s->out_sample_fmt && s->out.planar > > > + && !((s->flags & SWR_FLAG_CLIP) && (s->out_sample_fmt == > > > AV_SAMPLE_FMT_FLT || s->out_sample_fmt == AV_SAMPLE_FMT_DBL)) > > > && !(s->out_sample_fmt==AV_SAMPLE_FMT_S32P && > > > (s->dither.output_sample_bits&31))){ > > > if(preout==in){ > > > out_count= FFMIN(out_count, in_count); //TODO check at the > > > end if this is needed or redundant > > > @@ -685,6 +686,7 @@ static int swr_convert_internal(struct SwrContext *s, > > > AudioData *out, int out_co > > > //FIXME packed doesn't need more than 1 chan here! > > > swri_audio_convert(s->out_convert, out, conv_src, out_count); > > > } > > > + > > > return out_count; > > > } > > > > > > diff --git a/libswresample/swresample.h b/libswresample/swresample.h > > > index 10eaebc..3f56758 100644 > > > --- a/libswresample/swresample.h > > > +++ b/libswresample/swresample.h > > > @@ -138,6 +138,7 @@ > > > */ > > > > > > #define SWR_FLAG_RESAMPLE 1 ///< Force resampling even if equal sample > > > rate > > > +#define SWR_FLAG_CLIP 2 ///< Clip float/double output to -1.0..1.0 > > > //TODO use int resample ? > > > //long term TODO can we enable this dynamically? > > > > > > diff --git a/libswresample/swresample_internal.h > > > b/libswresample/swresample_internal.h > > > index bf0cec7..ab1e853 100644 > > > --- a/libswresample/swresample_internal.h > > > +++ b/libswresample/swresample_internal.h > > > @@ -206,14 +206,14 @@ int swri_dither_init(SwrContext *s, enum > > > AVSampleFormat out_fmt, enum AVSampleFo > > > void swri_audio_convert_init_aarch64(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels); > > > + int channels, int flags); > > > void swri_audio_convert_init_arm(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels); > > > + int channels, int flags); > > > void swri_audio_convert_init_x86(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels); > > > + int channels, int flags); > > > > > > #endif > > > diff --git a/libswresample/x86/audio_convert_init.c > > > b/libswresample/x86/audio_convert_init.c > > > index 5e5e91d..e831a06 100644 > > > --- a/libswresample/x86/audio_convert_init.c > > > +++ b/libswresample/x86/audio_convert_init.c > > > @@ -36,11 +36,17 @@ PROTO4(_unpack_6ch_) > > > av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac, > > > enum AVSampleFormat out_fmt, > > > enum AVSampleFormat in_fmt, > > > - int channels){ > > > + int channels, int flags){ > > > int mm_flags = av_get_cpu_flags(); > > > > > > ac->simd_f= NULL; > > > > > > + if ( (flags & SWR_FLAG_CLIP) > > > + && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT > > > + && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) { > > > + return; > > > + } > > > + > > > //FIXME add memcpy case > > > > > > #define MULTI_CAPS_FUNC(flag, cap) \ > > > > Wouldn't it be easier to just run float clipping as a postprocessing > > step, and not bother with e.g. double->float clipping functions? > > it should be more efficient to do only one pass, and the dbl/flt->int* > functions already clip so it seemed that it should fit well in there > it could be done as a seperate pass instead of course > > I think the existing asm should be updated to support that cliping > case, this would also avoid the ugly SWR_FLAG_CLIP + return checks. > But thats beyond the scope of this patch I think in this case, simplicity should be preferred. But you know swr's processing chain better. I just want to point out that the double sample format is extremely obscure, and probably not worth optimizing at all. _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel