Based on patches by Michael Niedermayer <[email protected]>.
---
 ffmpeg.c                        |   40 +++++++++++++++++++++++++++++++
 libavcodec/aacdec.c             |   25 +++++++++++++++----
 libavcodec/ac3dec.c             |   34 ++++++++++++++++++++------
 libavcodec/dca.c                |   34 +++++++++++++++++++++-----
 libavcodec/fmtconvert.c         |   20 ++++++++++++++++
 libavcodec/fmtconvert.h         |    9 +++++++
 libavcodec/vorbis_dec.c         |   18 ++++++++++++-
 libavcodec/x86/fmtconvert.asm   |   49 +++++++++++++++++++++++++++++++++++++++
 libavcodec/x86/fmtconvert_mmx.c |   12 +++++++++
 9 files changed, 218 insertions(+), 23 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 886d5da..9476bdf 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -543,6 +543,35 @@ static void choose_sample_fmt(AVStream *st, AVCodec *codec)
     }
 }
 
+/**
+ * Update the requested input sample format based on the output sample format.
+ * This is currently only used to request float output from decoders which
+ * support multiple sample formats, one of which is AV_SAMPLE_FMT_FLT.
+ * Ideally this will be removed in the future when decoders do not do format
+ * conversion and only output in their native format.
+ */
+static void update_sample_fmt(AVCodecContext *dec, AVCodec *dec_codec,
+                              AVCodecContext *enc)
+{
+    if (enc->sample_fmt == dec->sample_fmt ||
+        dec->request_sample_fmt > AV_SAMPLE_FMT_NONE)
+        return;
+
+    if (dec_codec && dec_codec->sample_fmts) {
+        if (enc->sample_fmt == AV_SAMPLE_FMT_FLT ||
+            enc->sample_fmt == AV_SAMPLE_FMT_DBL ||
+            enc->sample_fmt == AV_SAMPLE_FMT_S32) {
+            const enum AVSampleFormat *p = dec_codec->sample_fmts;
+            for (; *p != AV_SAMPLE_FMT_NONE; p++) {
+                if (*p == AV_SAMPLE_FMT_FLT) {
+                    dec->request_sample_fmt = *p;
+                    break;
+                }
+            }
+        }
+    }
+}
+
 static void choose_sample_rate(AVStream *st, AVCodec *codec)
 {
     if(codec && codec->supported_samplerates){
@@ -2287,6 +2316,17 @@ static int transcode(AVFormatContext **output_files,
                 ret = AVERROR(EINVAL);
                 goto dump_format;
             }
+
+            /* update requested sample format for the input stream based on the
+               corresponding output encoder sample format */
+            for (j = 0; j < nb_ostreams; j++) {
+                ost = ost_table[j];
+                if (i == ost->source_index) {
+                    update_sample_fmt(ist->st->codec, codec, ost->st->codec);
+                    break;
+                }
+            }
+
             if (avcodec_open(ist->st->codec, codec) < 0) {
                 snprintf(error, sizeof(error), "Error while opening decoder for input stream #%d.%d",
                         ist->file_index, ist->index);
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index c9761a1..85e2889 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -549,7 +549,10 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
             return -1;
     }
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT)
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+    else
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
     AAC_INIT_VLC_STATIC( 0, 304);
     AAC_INIT_VLC_STATIC( 1, 270);
@@ -574,7 +577,10 @@ static av_cold int aac_decode_init(AVCodecContext *avctx)
     // 60    - Required to scale values to the correct range [-32768,32767]
     //         for float to int16 conversion. (1 << (60 / 4)) == 32768
     ac->sf_scale  = 1. / -1024.;
-    ac->sf_offset = 60;
+    if (avctx->sample_fmt == AV_SAMPLE_FMT_S16)
+        ac->sf_offset = 60;
+    else
+        ac->sf_offset = 0;
 
     ff_aac_tableinit();
 
@@ -2166,7 +2172,8 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
         avctx->frame_size = samples;
     }
 
-    data_size_tmp = samples * avctx->channels * sizeof(int16_t);
+    data_size_tmp = samples * avctx->channels *
+                    (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     if (*data_size < data_size_tmp) {
         av_log(avctx, AV_LOG_ERROR,
                "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
@@ -2175,8 +2182,14 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
     }
     *data_size = data_size_tmp;
 
-    if (samples)
+    if (samples) {
+        if (avctx->sample_fmt == SAMPLE_FMT_FLT) {
+            ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
+                                          samples, avctx->channels);
+        } else {
         ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+        }
+    }
 
     if (ac->output_configured)
         ac->output_configured = OC_LOCKED;
@@ -2494,7 +2507,7 @@ AVCodec ff_aac_decoder = {
     aac_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
@@ -2514,7 +2527,7 @@ AVCodec ff_aac_latm_decoder = {
     .decode = latm_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
     .sample_fmts = (const enum AVSampleFormat[]) {
-        AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
     },
     .channel_layouts = aac_channel_layout,
 };
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 015ebae..34c504a 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -188,9 +188,6 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     ff_fmt_convert_init(&s->fmt_conv, avctx);
     av_lfg_init(&s->dith_state, 0);
 
-    /* set scale value for float to int16 conversion */
-    s->mul_bias = 32767.0f;
-
     /* allow downmixing to stereo or mono */
     if (avctx->channels > 0 && avctx->request_channels > 0 &&
             avctx->request_channels < avctx->channels &&
@@ -199,12 +196,20 @@ static av_cold int ac3_decode_init(AVCodecContext *avctx)
     }
     s->downmixed = 1;
 
+    /* set sample_fmt and scale value for float to int16 conversion */
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        s->mul_bias = 1.0f;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->mul_bias = 32767.0f;
+    }
+
     /* allocate context input buffer */
         s->input_buffer = av_mallocz(AC3_FRAME_BUFFER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
         if (!s->input_buffer)
             return AVERROR(ENOMEM);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
     return 0;
 }
 
@@ -1299,7 +1304,8 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
     const uint8_t *buf = avpkt->data;
     int buf_size = avpkt->size;
     AC3DecodeContext *s = avctx->priv_data;
-    int16_t *out_samples = (int16_t *)data;
+    int16_t *out_s16 = (int16_t *)data;
+    float   *out_flt = (float *)data;
     int blk, ch, err;
     const uint8_t *channel_map;
     const float *output[AC3_MAX_CHANNELS];
@@ -1405,10 +1411,16 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
             av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
             err = 1;
         }
-        s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
-        out_samples += 256 * s->out_channels;
+        if (avctx->sample_fmt == AV_SAMPLE_FMT_S16) {
+            s->fmt_conv.float_to_int16_interleave(out_s16, output, 256, s->out_channels);
+            out_s16 += 256 * s->out_channels;
+        } else {  // AV_SAMPLE_FMT_FLT
+            s->fmt_conv.float_interleave(out_flt, output, 256, s->out_channels);
+            out_flt += 256 * s->out_channels;
+        }
     }
-    *data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t);
+    *data_size = s->num_blocks * 256 * s->out_channels *
+                 (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
     return FFMIN(buf_size, s->frame_size);
 }
 
@@ -1435,6 +1447,9 @@ AVCodec ff_ac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52A (AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+    },
 };
 
 #if CONFIG_EAC3_DECODER
@@ -1447,5 +1462,8 @@ AVCodec ff_eac3_decoder = {
     .close = ac3_decode_end,
     .decode = ac3_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("ATSC A/52B (AC-3, E-AC-3)"),
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+    },
 };
 #endif
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index e3c6466..95ff17d 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -1626,7 +1626,9 @@ static int dca_decode_frame(AVCodecContext * avctx,
     int lfe_samples;
     int num_core_channels = 0;
     int i;
-    int16_t *samples = data;
+    int16_t *samples_s16 = data;
+    float   *samples_flt = data;
+    int out_size;
     DCAContext *s = avctx->priv_data;
     int channels;
     int core_ss_end;
@@ -1812,9 +1814,11 @@ static int dca_decode_frame(AVCodecContext * avctx,
         return -1;
     }
 
-    if (*data_size < (s->sample_blocks / 8) * 256 * sizeof(int16_t) * channels)
+    out_size = 256 / 8 * s->sample_blocks * channels *
+               (av_get_bits_per_sample_fmt(avctx->sample_fmt) / 8);
+    if (*data_size < out_size)
         return -1;
-    *data_size = 256 / 8 * s->sample_blocks * sizeof(int16_t) * channels;
+    *data_size = out_size;
 
     /* filter to get final output */
     for (i = 0; i < (s->sample_blocks / 8); i++) {
@@ -1833,8 +1837,16 @@ static int dca_decode_frame(AVCodecContext * avctx,
             }
         }
 
-        s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
-        samples += 256 * channels;
+        if (avctx->sample_fmt == SAMPLE_FMT_FLT) {
+            s->fmt_conv.float_interleave(samples_flt, s->samples_chanptr, 256,
+                                         channels);
+            samples_flt += 256 * channels;
+        } else {
+            s->fmt_conv.float_to_int16_interleave(samples_s16,
+                                                  s->samples_chanptr, 256,
+                                                  channels);
+            samples_s16 += 256 * channels;
+        }
     }
 
     /* update lfe history */
@@ -1870,9 +1882,14 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
 
     for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++)
         s->samples_chanptr[i] = s->samples + i * 256;
-    avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 
-    s->scale_bias = 1.0;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+        s->scale_bias = 1.0 / 32768.0;
+    } else {
+        avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+        s->scale_bias = 1.0;
+    }
 
     /* allow downmixing to stereo */
     if (avctx->channels > 0 && avctx->request_channels < avctx->channels &&
@@ -1909,5 +1926,8 @@ AVCodec ff_dca_decoder = {
     .close = dca_decode_end,
     .long_name = NULL_IF_CONFIG_SMALL("DCA (DTS Coherent Acoustics)"),
     .capabilities = CODEC_CAP_CHANNEL_CONF,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+    },
     .profiles = NULL_IF_CONFIG_SMALL(profiles),
 };
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index e970755..58fece7 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -56,11 +56,31 @@ static void float_to_int16_interleave_c(int16_t *dst, const float **src,
     }
 }
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels)
+{
+    int j, c;
+    unsigned int i;
+    if (channels == 2) {
+        for (i = 0; i < len; i++) {
+            dst[2*i]   = src[0][i];
+            dst[2*i+1] = src[1][i];
+        }
+    } else if (channels == 1 && len < INT_MAX / sizeof(float)) {
+        memcpy(dst, src[0], len * sizeof(float));
+    } else {
+        for (c = 0; c < channels; c++)
+            for (i = 0, j = c; i < len; i++, j += channels)
+                dst[j] = src[c][i];
+    }
+}
+
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
     c->float_to_int16             = float_to_int16_c;
     c->float_to_int16_interleave  = float_to_int16_interleave_c;
+    c->float_interleave           = ff_float_interleave_c;
 
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
     if (HAVE_ALTIVEC) ff_fmt_convert_init_altivec(c, avctx);
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index e0afee4..d774113 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -68,8 +68,17 @@ typedef struct FmtConvertContext {
      */
     void (*float_to_int16_interleave)(int16_t *dst, const float **src,
                                       long len, int channels);
+
+    /**
+     * Convert an array of interleaved float to multiple arrays of float.
+     */
+    void (*float_interleave)(float *dst, const float **src, unsigned int len,
+                             int channels);
 } FmtConvertContext;
 
+void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
+                           int channels);
+
 void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx);
 
 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx);
diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 5fa7be1..9d9bed6 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -962,7 +962,13 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     dsputil_init(&vc->dsp, avccontext);
     ff_fmt_convert_init(&vc->fmt_conv, avccontext);
 
-    vc->scale_bias = 32768.0f;
+    if (avccontext->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+        avccontext->sample_fmt  = AV_SAMPLE_FMT_FLT;
+        vc->scale_bias = 1.0f;
+    } else {
+        avccontext->sample_fmt  = AV_SAMPLE_FMT_S16;
+        vc->scale_bias = 32768.0f;
+    }
 
     if (!headers_len) {
         av_log(avccontext, AV_LOG_ERROR, "Extradata missing.\n");
@@ -1007,7 +1013,6 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext)
     avccontext->channels    = vc->audio_channels;
     avccontext->sample_rate = vc->audio_samplerate;
     avccontext->frame_size  = FFMIN(vc->blocksize[0], vc->blocksize[1]) >> 2;
-    avccontext->sample_fmt  = AV_SAMPLE_FMT_S16;
 
     return 0 ;
 }
@@ -1635,9 +1640,15 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
                               len * ff_vorbis_channel_layout_offsets[vc->audio_channels - 1][i];
     }
 
+    if (avccontext->sample_fmt == AV_SAMPLE_FMT_FLT) {
+        vc->fmt_conv.float_interleave(data, channel_ptrs, len,
+                                      vc->audio_channels);
+        *data_size = len * sizeof(float) * vc->audio_channels;
+    } else {
     vc->fmt_conv.float_to_int16_interleave(data, channel_ptrs, len,
                                            vc->audio_channels);
     *data_size = len * 2 * vc->audio_channels;
+    }
 
     return buf_size ;
 }
@@ -1664,5 +1675,8 @@ AVCodec ff_vorbis_decoder = {
     vorbis_decode_frame,
     .long_name = NULL_IF_CONFIG_SMALL("Vorbis"),
     .channel_layouts = ff_vorbis_channel_layouts,
+    .sample_fmts = (const enum AVSampleFormat[]) {
+        AV_SAMPLE_FMT_FLT,AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+    },
 };
 
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index ddcbab4..5d77ee5 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -89,3 +89,52 @@ FLOAT_TO_INT16_INTERLEAVE6 3dnow
 %undef pswapd
 FLOAT_TO_INT16_INTERLEAVE6 3dn2
 %undef cvtps2pi
+
+;-----------------------------------------------------------------------------
+; void ff_float_interleave6_sse(float *dst, const float **src,
+;                               unsigned int len, int channels);
+;-----------------------------------------------------------------------------
+
+cglobal float_interleave6_sse, 2,7,4, dst, src, src1, src2, src3, src4, src5
+%ifdef ARCH_X86_64
+    %define lend r10d
+    mov     lend, r2d
+%else
+    %define lend dword r2m
+%endif
+    mov      src1q, [srcq+1*gprsize]
+    mov      src2q, [srcq+2*gprsize]
+    mov      src3q, [srcq+3*gprsize]
+    mov      src4q, [srcq+4*gprsize]
+    mov      src5q, [srcq+5*gprsize]
+    mov       srcq, [srcq]
+    sub      src1q, srcq
+    sub      src2q, srcq
+    sub      src3q, srcq
+    sub      src4q, srcq
+    sub      src5q, srcq
+.loop:
+    movlps    xmm0, [srcq]
+    movhps    xmm0, [srcq+src3q]
+    movlps    xmm1, [srcq+src1q]
+    movhps    xmm1, [srcq+src4q]
+    movlps    xmm2, [srcq+src2q]
+    movhps    xmm2, [srcq+src5q]
+    movlhps   xmm3, xmm0
+    movhlps   xmm3, xmm0
+    unpcklps  xmm0, xmm1
+    unpckhps  xmm1, xmm2
+    unpcklps  xmm2, xmm3
+    movlhps   xmm3, xmm0
+    movhlps   xmm3, xmm0
+    movlhps   xmm0, xmm2
+    shufps    xmm2, xmm1, 0xee
+    movlhps   xmm1, xmm3
+    movaps  [dstq   ], xmm0
+    movaps  [dstq+16], xmm1
+    movaps  [dstq+32], xmm2
+    add srcq, 8
+    add dstq, 48
+    sub lend, 2
+    jg .loop
+    REP_RET
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 847bd80..4945d81 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -235,6 +235,17 @@ static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
         float_to_int16_interleave_3dnow(dst, src, len, channels);
 }
 
+void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len, int channels);
+
+static void float_interleave_sse(float *dst, const float **src,
+                                 unsigned int len, int channels)
+{
+    if (channels == 6)
+        ff_float_interleave6_sse(dst, src, len, channels);
+    else
+        ff_float_interleave_c(dst, src, len, channels);
+}
+
 void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
@@ -256,6 +267,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
             c->float_to_int16 = float_to_int16_sse;
             c->float_to_int16_interleave = float_to_int16_interleave_sse;
+            c->float_interleave = float_interleave_sse;
         }
         if(mm_flags & AV_CPU_FLAG_SSE2){
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to