Re: [FFmpeg-devel] [PATCH] swresample: Add support for clipping float/double to -1.0..1.0 range

wm4 Wed, 04 Nov 2015 08:09:13 -0800

On Wed, 4 Nov 2015 16:15:36 +0100
Michael Niedermayer <mich...@niedermayer.cc> wrote:


> On Fri, Oct 23, 2015 at 07:18:06PM +0200, wm4 wrote:
> > On Fri, 23 Oct 2015 18:08:00 +0200
> > Michael Niedermayer <michae...@gmx.at> wrote:
> >   
> > > From: Michael Niedermayer <mich...@niedermayer.cc>
> > > 
> > > Signed-off-by: Michael Niedermayer <mich...@niedermayer.cc>
> > > ---
> > >  libswresample/aarch64/audio_convert_init.c |    8 ++++-
> > >  libswresample/arm/audio_convert_init.c     |    8 ++++-
> > >  libswresample/audioconvert.c               |   44 
> > > ++++++++++++++++++++++++++--
> > >  libswresample/options.c                    |    1 +
> > >  libswresample/swresample.c                 |    6 ++--
> > >  libswresample/swresample.h                 |    1 +
> > >  libswresample/swresample_internal.h        |    6 ++--
> > >  libswresample/x86/audio_convert_init.c     |    8 ++++-
> > >  8 files changed, 71 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/libswresample/aarch64/audio_convert_init.c 
> > > b/libswresample/aarch64/audio_convert_init.c
> > > index 60e24ad..dedb1aa 100644
> > > --- a/libswresample/aarch64/audio_convert_init.c
> > > +++ b/libswresample/aarch64/audio_convert_init.c
> > > @@ -48,12 +48,18 @@ static void conv_fltp_to_s16_nch_neon(uint8_t **dst, 
> > > const uint8_t **src, int le
> > >  av_cold void swri_audio_convert_init_aarch64(struct AudioConvert *ac,
> > >                                         enum AVSampleFormat out_fmt,
> > >                                         enum AVSampleFormat in_fmt,
> > > -                                       int channels)
> > > +                                       int channels, int flags)
> > >  {
> > >      int cpu_flags = av_get_cpu_flags();
> > >  
> > >      ac->simd_f= NULL;
> > >  
> > > +    if (   (flags & SWR_FLAG_CLIP)
> > > +        && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT
> > > +        && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) {
> > > +        return;
> > > +    }
> > > +
> > >      if (have_neon(cpu_flags)) {
> > >          if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT 
> > > || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
> > >              ac->simd_f = conv_flt_to_s16_neon;
> > > diff --git a/libswresample/arm/audio_convert_init.c 
> > > b/libswresample/arm/audio_convert_init.c
> > > index ec9e62e..f39978d 100644
> > > --- a/libswresample/arm/audio_convert_init.c
> > > +++ b/libswresample/arm/audio_convert_init.c
> > > @@ -48,12 +48,18 @@ static void conv_fltp_to_s16_nch_neon(uint8_t **dst, 
> > > const uint8_t **src, int le
> > >  av_cold void swri_audio_convert_init_arm(struct AudioConvert *ac,
> > >                                         enum AVSampleFormat out_fmt,
> > >                                         enum AVSampleFormat in_fmt,
> > > -                                       int channels)
> > > +                                       int channels, int flags)
> > >  {
> > >      int cpu_flags = av_get_cpu_flags();
> > >  
> > >      ac->simd_f= NULL;
> > >  
> > > +    if (   (flags & SWR_FLAG_CLIP)
> > > +        && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT
> > > +        && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) {
> > > +        return;
> > > +    }
> > > +
> > >      if (have_neon(cpu_flags)) {
> > >          if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT 
> > > || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
> > >              ac->simd_f = conv_flt_to_s16_neon;
> > > diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c
> > > index 58b0bf3..dc6734a 100644
> > > --- a/libswresample/audioconvert.c
> > > +++ b/libswresample/audioconvert.c
> > > @@ -77,6 +77,27 @@ CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, 
> > > AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*
> > >  CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const 
> > > double*)pi)
> > >  CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const 
> > > double*)pi)
> > >  
> > > +#define CONV_FUNC_NAME_CLIP(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ 
> > > ## dst_fmt ## _clip
> > > +#define CONV_FUNC_CLIP(ofmt, otype, ifmt, expr)\
> > > +static void CONV_FUNC_NAME_CLIP(ofmt, ifmt)(uint8_t *po, const uint8_t 
> > > *pi, int is, int os, uint8_t *end)\
> > > +{\
> > > +    uint8_t *end2 = end - 3*os;\
> > > +    while(po < end2){\
> > > +        *(otype*)po = expr; pi += is; po += os;\
> > > +        *(otype*)po = expr; pi += is; po += os;\
> > > +        *(otype*)po = expr; pi += is; po += os;\
> > > +        *(otype*)po = expr; pi += is; po += os;\
> > > +    }\
> > > +    while(po < end){\
> > > +        *(otype*)po = expr; pi += is; po += os;\
> > > +    }\
> > > +}
> > > +
> > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, 
> > > av_clipf(*(const  float*)pi, -1.0, 1.0))
> > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, 
> > > av_clipf(*(const  float*)pi, -1.0, 1.0))
> > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, 
> > > av_clipf(*(const double*)pi, -1.0, 1.0))
> > > +CONV_FUNC_CLIP(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, 
> > > av_clipd(*(const double*)pi, -1.0, 1.0))
> > > +
> > >  #define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = 
> > > CONV_FUNC_NAME(out, in)
> > >  
> > >  static conv_func_type * const 
> > > fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
> > > @@ -107,6 +128,15 @@ static conv_func_type * const 
> > > fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAM
> > >      FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL),
> > >  };
> > >  
> > > +#define FMT_PAIR_FUNC_CLIP(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = 
> > > CONV_FUNC_NAME_CLIP(out, in)
> > > +
> > > +static conv_func_type * const 
> > > fmt_pair_to_conv_functions_clip[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
> > > +    FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT),
> > > +    FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT),
> > > +    FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL),
> > > +    FMT_PAIR_FUNC_CLIP(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL),
> > > +};
> > > +
> > >  static void cpy1(uint8_t **dst, const uint8_t **src, int len){
> > >      memcpy(*dst, *src, len);
> > >  }
> > > @@ -154,9 +184,17 @@ AudioConvert *swri_audio_convert_alloc(enum 
> > > AVSampleFormat out_fmt,
> > >          }
> > >      }
> > >  
> > > -    if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, 
> > > in_fmt, channels);
> > > -    if(ARCH_ARM)              swri_audio_convert_init_arm(ctx, out_fmt, 
> > > in_fmt, channels);
> > > -    if(ARCH_AARCH64)          swri_audio_convert_init_aarch64(ctx, 
> > > out_fmt, in_fmt, channels);
> > > +    if (flags & SWR_FLAG_CLIP) {
> > > +        conv_func_type *f2 = 
> > > fmt_pair_to_conv_functions_clip[av_get_packed_sample_fmt(out_fmt) + 
> > > AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)];
> > > +        if (f2) {
> > > +            f = f2;
> > > +            ctx->simd_f = NULL;
> > > +        }
> > > +    }
> > > +
> > > +    if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, 
> > > in_fmt, channels, flags);
> > > +    if(ARCH_ARM)              swri_audio_convert_init_arm(ctx, out_fmt, 
> > > in_fmt, channels, flags);
> > > +    if(ARCH_AARCH64)          swri_audio_convert_init_aarch64(ctx, 
> > > out_fmt, in_fmt, channels, flags);
> > >  
> > >      return ctx;
> > >  }
> > > diff --git a/libswresample/options.c b/libswresample/options.c
> > > index 0bcb102..bb68158 100644
> > > --- a/libswresample/options.c
> > > +++ b/libswresample/options.c
> > > @@ -67,6 +67,7 @@ static const AVOption options[]={
> > >  {"flags"                , "set flags"                   , OFFSET(flags   
> > >        ), AV_OPT_TYPE_FLAGS, {.i64=0                     }, 0      , 
> > > UINT_MAX  , PARAM, "flags"},
> > >  {"swr_flags"            , "set flags"                   , OFFSET(flags   
> > >        ), AV_OPT_TYPE_FLAGS, {.i64=0                     }, 0      , 
> > > UINT_MAX  , PARAM, "flags"},
> > >  {"res"                  , "force resampling"            , 0              
> > >         , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_RESAMPLE     }, INT_MIN, 
> > > INT_MAX   , PARAM, "flags"},
> > > +{"clip"                 , "clip float/double to -1.0..1.0", 0            
> > >         , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_CLIP         }, INT_MIN, 
> > > INT_MAX   , PARAM, "flags"},
> > >  
> > >  {"dither_scale"         , "set dither scale"            , 
> > > OFFSET(dither.scale   ), AV_OPT_TYPE_FLOAT, {.dbl=1                     
> > > }, 0      , INT_MAX   , PARAM},
> > >  
> > > diff --git a/libswresample/swresample.c b/libswresample/swresample.c
> > > index 8e23899..029b85e 100644
> > > --- a/libswresample/swresample.c
> > > +++ b/libswresample/swresample.c
> > > @@ -324,14 +324,14 @@ av_assert0(s->out.ch_count);
> > >  
> > >      if(!s->resample && !s->rematrix && !s->channel_map && 
> > > !s->dither.method){
> > >          s->full_convert = swri_audio_convert_alloc(s->out_sample_fmt,
> > > -                                                   s-> in_sample_fmt, 
> > > s-> in.ch_count, NULL, 0);
> > > +                                                   s-> in_sample_fmt, 
> > > s-> in.ch_count, NULL, s->flags & SWR_FLAG_CLIP);
> > >          return 0;
> > >      }
> > >  
> > >      s->in_convert = swri_audio_convert_alloc(s->int_sample_fmt,
> > >                                               s-> in_sample_fmt, 
> > > s->used_ch_count, s->channel_map, 0);
> > >      s->out_convert= swri_audio_convert_alloc(s->out_sample_fmt,
> > > -                                             s->int_sample_fmt, 
> > > s->out.ch_count, NULL, 0);
> > > +                                             s->int_sample_fmt, 
> > > s->out.ch_count, NULL, s->flags & SWR_FLAG_CLIP);
> > >  
> > >      if (!s->in_convert || !s->out_convert) {
> > >          ret = AVERROR(ENOMEM);
> > > @@ -606,6 +606,7 @@ static int swr_convert_internal(struct SwrContext *s, 
> > > AudioData *out, int out_co
> > >          preout= midbuf;
> > >  
> > >      if(s->int_sample_fmt == s->out_sample_fmt && s->out.planar
> > > +       && !((s->flags & SWR_FLAG_CLIP) && (s->out_sample_fmt == 
> > > AV_SAMPLE_FMT_FLT || s->out_sample_fmt == AV_SAMPLE_FMT_DBL))
> > >         && !(s->out_sample_fmt==AV_SAMPLE_FMT_S32P && 
> > > (s->dither.output_sample_bits&31))){
> > >          if(preout==in){
> > >              out_count= FFMIN(out_count, in_count); //TODO check at the 
> > > end if this is needed or redundant
> > > @@ -685,6 +686,7 @@ static int swr_convert_internal(struct SwrContext *s, 
> > > AudioData *out, int out_co
> > >  //FIXME packed doesn't need more than 1 chan here!
> > >          swri_audio_convert(s->out_convert, out, conv_src, out_count);
> > >      }
> > > +
> > >      return out_count;
> > >  }
> > >  
> > > diff --git a/libswresample/swresample.h b/libswresample/swresample.h
> > > index 10eaebc..3f56758 100644
> > > --- a/libswresample/swresample.h
> > > +++ b/libswresample/swresample.h
> > > @@ -138,6 +138,7 @@
> > >   */
> > >  
> > >  #define SWR_FLAG_RESAMPLE 1 ///< Force resampling even if equal sample 
> > > rate
> > > +#define SWR_FLAG_CLIP 2     ///< Clip float/double output to -1.0..1.0
> > >  //TODO use int resample ?
> > >  //long term TODO can we enable this dynamically?
> > >  
> > > diff --git a/libswresample/swresample_internal.h 
> > > b/libswresample/swresample_internal.h
> > > index bf0cec7..ab1e853 100644
> > > --- a/libswresample/swresample_internal.h
> > > +++ b/libswresample/swresample_internal.h
> > > @@ -206,14 +206,14 @@ int swri_dither_init(SwrContext *s, enum 
> > > AVSampleFormat out_fmt, enum AVSampleFo
> > >  void swri_audio_convert_init_aarch64(struct AudioConvert *ac,
> > >                                   enum AVSampleFormat out_fmt,
> > >                                   enum AVSampleFormat in_fmt,
> > > -                                 int channels);
> > > +                                 int channels, int flags);
> > >  void swri_audio_convert_init_arm(struct AudioConvert *ac,
> > >                                   enum AVSampleFormat out_fmt,
> > >                                   enum AVSampleFormat in_fmt,
> > > -                                 int channels);
> > > +                                 int channels, int flags);
> > >  void swri_audio_convert_init_x86(struct AudioConvert *ac,
> > >                                   enum AVSampleFormat out_fmt,
> > >                                   enum AVSampleFormat in_fmt,
> > > -                                 int channels);
> > > +                                 int channels, int flags);
> > >  
> > >  #endif
> > > diff --git a/libswresample/x86/audio_convert_init.c 
> > > b/libswresample/x86/audio_convert_init.c
> > > index 5e5e91d..e831a06 100644
> > > --- a/libswresample/x86/audio_convert_init.c
> > > +++ b/libswresample/x86/audio_convert_init.c
> > > @@ -36,11 +36,17 @@ PROTO4(_unpack_6ch_)
> > >  av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
> > >                                   enum AVSampleFormat out_fmt,
> > >                                   enum AVSampleFormat in_fmt,
> > > -                                 int channels){
> > > +                                 int channels, int flags){
> > >      int mm_flags = av_get_cpu_flags();
> > >  
> > >      ac->simd_f= NULL;
> > >  
> > > +    if (   (flags & SWR_FLAG_CLIP)
> > > +        && av_get_packed_sample_fmt(in_fmt) == AV_SAMPLE_FMT_FLT
> > > +        && av_get_packed_sample_fmt(out_fmt) == AV_SAMPLE_FMT_FLT) {
> > > +        return;
> > > +    }
> > > +
> > >  //FIXME add memcpy case
> > >  
> > >  #define MULTI_CAPS_FUNC(flag, cap) \  
> > 
> > Wouldn't it be easier to just run float clipping as a postprocessing
> > step, and not bother with e.g. double->float clipping functions?  
> 
> it should be more efficient to do only one pass, and the dbl/flt->int*
> functions already clip so it seemed that it should fit well in there
> it could be done as a seperate pass instead of course
> 
> I think the existing asm should be updated to support that cliping
> case, this would also avoid the ugly SWR_FLAG_CLIP + return checks.
> But thats beyond the scope of this patch

I think in this case, simplicity should be preferred. But you know
swr's processing chain better.

I just want to point out that the double sample format is extremely
obscure, and probably not worth optimizing at all.
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Re: [FFmpeg-devel] [PATCH] swresample: Add support for clipping float/double to -1.0..1.0 range

Reply via email to