[Libav-user] Trying to decode, filter and resample audio, but I get more samples than expected

Nicholas Stafie Mon, 27 Jun 2016 14:33:14 -0700

As the subject line says, I'm trying to decode, filter and resample audio. The 
resampled s16le output would then be used later on.


My problem is: the output I get contains the full song, correctly resampled, 
but it doesn't stop there and keeps outputting about a minute or so of extra 
frames. Those extra frames, when played back, sound like a sped up version of 
the song played in reverse.

Attached is a file with my code, and there will be a direct link to download an 
audio file for convenience, but this happened with any file I tried no matter 
the format: http://freepd.com/Electronic/Fall%20Falling.mp3

Compile the file with "gcc main.c -lavformat -lavcodec -lavutil -lavfilter 
-lswresample -o decoder" and run it like so "./decoder input-file.mp3 > 
output.bin", as it'll output the PCM frames it decodes to stdout.

This code is written with version 2.8.6 in mind, because that is what's 
available where I have to host this, but I've tried it on my machine with git 
version N-80780-gd693392 and I get the same result.

I've tried everything I could think of, but I'm not sure what I did wrong. 
Thanks in advance for any help!

#include <libavformat/avformat.h>
#include <libavutil/samplefmt.h>
#include <libavutil/opt.h>
#include <libavfilter/avfilter.h>
#include <libswresample/swresample.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <string.h>
#include <stdio.h>

const char* get_str_err(const int err) {
    static char buf[1024];
    if (av_strerror(err, buf, sizeof(buf))) {
        strcpy(buf, "Couldn't get a proper error!");
    }
    return buf;
}

int main(int argc, char **argv) {
    int err;

    av_register_all();
    avfilter_register_all();

    const char *filter_str = "anull";

    // setup format context
    AVFormatContext *fctx = NULL;
    if ((err = avformat_open_input(&fctx, argv[1], NULL, NULL)) < 0) {
        fprintf(stderr, "Couldn't open input: %s\n", get_str_err(err));
        return err;
    }
    if ((err = avformat_find_stream_info(fctx, NULL)) < 0) {
        fprintf(stderr, "Couldn't find stream info: %s\n", get_str_err(err));
        return err;
    }

    // find codec and stream ID
    AVCodec *decoder = NULL;
    int stream_id = av_find_best_stream(fctx, AVMEDIA_TYPE_AUDIO, -1, -1, &decoder, 0);
    if (stream_id < 0) {
        err = stream_id;
        fprintf(stderr, "Couldn't find best stream: %s\n", get_str_err(err));
        return err;
    }
    AVStream *stream = fctx->streams[stream_id];
    AVCodecContext *codec_ctx = stream->codec;

    // setup codec context
    if ((err = avcodec_open2(codec_ctx, decoder, NULL)) < 0) {
        fprintf(stderr, "Couldn't open codec: %s\n", get_str_err(err));
        return err;
    }

    // setup resampler
    int64_t dst_channel_layout = av_get_default_channel_layout(codec_ctx->channels);
    enum AVSampleFormat dst_sample_fmt = AV_SAMPLE_FMT_S16;
    int dst_sample_rate = 48000;

    int64_t src_channel_layout = codec_ctx->channel_layout;
    enum AVSampleFormat src_sample_fmt = codec_ctx->sample_fmt;
    int src_sample_rate = codec_ctx->sample_rate;

    SwrContext *resampler = swr_alloc_set_opts(NULL,
                                               dst_channel_layout,
                                               dst_sample_fmt,
                                               dst_sample_rate,
                                               src_channel_layout,
                                               src_sample_fmt,
                                               src_sample_rate,
                                               0,
                                               NULL);

    if ((err = swr_init(resampler)) < 0) {
        fprintf(stderr, "Couldn't init resampler: %s\n", get_str_err(err));
        return err;
    }

    // setup filter
    AVFilterGraph *filter = avfilter_graph_alloc();

    AVFilter *abuffer = avfilter_get_by_name("abuffer");
    AVFilter *abuffersink = avfilter_get_by_name("abuffersink");

    char args[512];
    sprintf (args, "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%"PRIx64,
                     codec_ctx->time_base.num, codec_ctx->time_base.den,
                     codec_ctx->sample_rate, av_get_sample_fmt_name(codec_ctx->sample_fmt),
                     codec_ctx->channel_layout);

    fprintf(stderr, "Args: %s\n", args);

    AVFilterContext *abuffer_ctx;
    AVFilterContext *abuffersink_ctx;

    err = avfilter_graph_create_filter(&abuffer_ctx,
                                       abuffer, "in", args, NULL, filter);
    if (err < 0) {
        fprintf(stderr, "Couldn't create filter for abuffer: %s\n", get_str_err(err));
        return err;
    }

    err = avfilter_graph_create_filter(&abuffersink_ctx,
                                       abuffersink, "out", "", NULL, filter);
    if (err < 0) {
        fprintf(stderr, "Couldn't create filter for abuffersink: %s\n", get_str_err(err));
        return err;
    }

    enum AVSampleFormat out_sample_fmts[] = { codec_ctx->sample_fmt, -1 };
    int64_t out_channel_layouts[] = { codec_ctx->channel_layout, -1 };
    int out_sample_rates[] = { codec_ctx->sample_rate, -1 };

    if ((err = av_opt_set_int_list(abuffersink_ctx, "sample_fmts", out_sample_fmts, -1, AV_OPT_SEARCH_CHILDREN)) < 0) {
        fprintf(stderr, "Couldn't set sample format: %s\n", get_str_err(err));
        return err;
    }

    if ((err = av_opt_set_int_list(abuffersink_ctx, "channel_layouts", out_channel_layouts, -1, AV_OPT_SEARCH_CHILDREN)) < 0) {
        fprintf(stderr, "Couldn't set channel layout: %s\n", get_str_err(err));
        return err;
    }

    if ((err = av_opt_set_int_list(abuffersink_ctx, "sample_rates", out_sample_rates, -1, AV_OPT_SEARCH_CHILDREN)) < 0) {
        fprintf(stderr, "Couldn't set sample rate: %s\n", get_str_err(err));
        return err;
    }

    AVFilterInOut *input = avfilter_inout_alloc();
    AVFilterInOut *output = avfilter_inout_alloc();

    output->name       = av_strdup("in");
    output->filter_ctx = abuffer_ctx;
    output->pad_idx    = 0;
    output->next       = NULL;

    input->name       = av_strdup("out");
    input->filter_ctx = abuffersink_ctx;
    input->pad_idx    = 0;
    input->next       = NULL;

    if ((err = avfilter_graph_parse_ptr(filter, filter_str, &input, &output, NULL)) < 0) {
        fprintf(stderr, "Couldn't parse graph: %s\n", get_str_err(err));
        return err;
    }

    if ((err = avfilter_graph_config(filter, NULL)) < 0) {
        fprintf(stderr, "Couldn't config graph: %s\n", get_str_err(err));
        return err;
    }

    // allocate frames
    AVFrame *decoded = av_frame_alloc();
    AVFrame *encoded = av_frame_alloc();
    AVPacket packet;
    int got_frame = 0;

    int total_samples = 0;

    while (1) {
        if ((err = av_read_frame(fctx, &packet)) < 0) {break;}
        av_packet_rescale_ts(&packet, fctx->streams[stream_id]->time_base,
                                      codec_ctx->time_base);
        if (packet.stream_index != stream_id) {continue;}
        int total_length = 0;

        int len = avcodec_decode_audio4(codec_ctx, decoded, &got_frame, &packet);
        if (len == 0) {break;} //nothing left

        if (got_frame) {
            av_frame_set_sample_rate(encoded, dst_sample_rate);
            if ((err = av_buffersrc_add_frame(abuffer_ctx, decoded)) < 0) {
                fprintf(stderr, "Couldn't add frame to buffersrc: %s\n", get_str_err(err));
                return err;
            }

            if ((err = av_buffersink_get_frame(abuffersink_ctx, decoded)) < 0) {
                fprintf(stderr, "Couldn't get frame from buffersink: %s\n", get_str_err(err));
                return err;
            }
            if (encoded->data[0] == NULL) {
                encoded->format = dst_sample_fmt;
                encoded->nb_samples = decoded->nb_samples;
                av_frame_set_channel_layout(encoded, dst_channel_layout);
                av_frame_get_buffer(encoded, 0);
            }

            if ((err = swr_convert_frame(resampler, encoded, decoded)) < 0) {
                fprintf(stderr, "Couldn't resample frame: %s\n", get_str_err(err));
                return err;
            }
            int data_size = av_samples_get_buffer_size(NULL, codec_ctx->channels, encoded->nb_samples, codec_ctx->sample_fmt, 1);
            fwrite(encoded->data[0], 1, data_size, stdout);
            total_samples += encoded->nb_samples;
        }
    }

    // flush filter and resampler
    if ((err = av_buffersrc_add_frame(abuffer_ctx, NULL)) < 0) {
        fprintf(stderr, "Couldn't add NULL frame to buffersrc: %s\n", get_str_err(err));
        return err;
    }

    while (1) {
        av_frame_set_sample_rate(encoded, dst_sample_rate);
        if ((err = av_buffersink_get_frame(abuffersink_ctx, decoded)) < 0) {
            if (err == 0xDFB9B0BB) {break;} else {
                fprintf(stderr, "Couldn't get frame from flushed buffersink: %s\n", get_str_err(err));
            } // EOF
        }
        if (encoded->data[0] == NULL) {
            encoded->format = dst_sample_fmt;
            encoded->nb_samples = decoded->nb_samples;
            av_frame_set_channel_layout(encoded, dst_channel_layout);
            av_frame_get_buffer(encoded, 0);
        }

        if ((err = swr_convert_frame(resampler, encoded, decoded)) < 0) {
            fprintf(stderr, "Couldn't resample flushed frame: %s\n", get_str_err(err));
            return err;
        }
        int data_size = av_samples_get_buffer_size(NULL, codec_ctx->channels, encoded->nb_samples, codec_ctx->sample_fmt, 1);
        fwrite(encoded->data[0], 1, data_size, stdout);
        total_samples += encoded->nb_samples;
    }

    while (swr_get_delay(resampler, 1) > 0) {
        av_frame_set_sample_rate(encoded, dst_sample_rate);
        if (encoded->data[0] == NULL) {
            encoded->format = dst_sample_fmt;
            encoded->nb_samples = decoded->nb_samples;
            av_frame_set_channel_layout(encoded, dst_channel_layout);
            av_frame_get_buffer(encoded, 0);
        }
        if ((err = swr_convert_frame(resampler, encoded, NULL)) < 0) {
            fprintf(stderr, "Couldn't get frame from flushed resampler: %s\n", get_str_err(err));
            return err;
        }
        int data_size = av_samples_get_buffer_size(NULL, codec_ctx->channels, encoded->nb_samples, codec_ctx->sample_fmt, 1);
        fwrite(encoded->data[0], 1, data_size, stdout);
        total_samples += encoded->nb_samples;
    }
    fprintf(stderr, "Samples: %i\n", total_samples*2); //times 2 because nb_samples is per channel, and we have 2

}

_______________________________________________
Libav-user mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/libav-user

[Libav-user] Trying to decode, filter and resample audio, but I get more samples than expected

Reply via email to