[FFmpeg-user] When using the multi overlay filter, the memory usage becomes excessively high.

ahqzy Tue, 17 Dec 2024 23:14:58 -0800

Hi all,

      When stitching four 1080p videos, multi-threaded decoding is used, and a 
single thread writes to their respective buffersrc filters. During the process, 
it was observed that memory usage rapidly increases (memory usage remains 
normal when only decoding is performed, without calling 
av_buffersrc_write_frame and av_buffersink_get_frame). Additionally, even after 
calling av_buffersink_get_frame, the memory does not decrease, even when 
av_frame_unref is invoked. It appears that there is a fixed buffer in the 
filter chain that is not being cleared. How can this issue be resolved?


The code is attached.

#include <iostream>
#include <string>
#include <vector>
#include <queue>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <fstream>
#include <sstream>
#include <unistd.h>
#include <cmath>


extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/opt.h>
#include <libavutil/avassert.h>
#include <libavutil/hwcontext.h>
}

static const int NUM_VIDEOS = 4;
bool decoding_finished[NUM_VIDEOS] = {false};
static bool processing_finished = false;
std::condition_variable cv[NUM_VIDEOS]; // Condition variables for each video

static AVBufferRef *hw_device_ctx[NUM_VIDEOS] = {NULL};
static AVBufferRef *hw_device_ctx_enc = NULL;

// Error handling function
void check_error(int ret, const char* msg) 
{
    if (ret < 0) 
    {
        std::cerr << "Error: " << msg << std::endl;
        exit(1);
    }
}

static int hw_decoder_init(AVCodecContext *ctx, const enum AVHWDeviceType type, const char *device, int index)
{
    int err =0;
    if ((err = av_hwdevice_ctx_create(&hw_device_ctx[index], type,device, NULL, 0))<0)
    {
        fprintf(stderr,"Failed to create specified Hw device.\n");
        return err;
    }

    ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx[index]);
    return err;
}

static enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat* pix_fmts)
{
    const enum AVPixelFormat *p;
    for (p= pix_fmts; *p != -1; p++)
        //if(*p == AV_PIX_FMT_cuda)
        if(*p == AV_PIX_FMT_NV12)
            return *p;

    fprintf(stderr,"Failed to get HW surface format.\n");
    return AV_PIX_FMT_NONE;
}

static int set_hwframe_ctx(AVCodecContext *ctx, int width, int height, AVBufferRef *hw_device_ctx)
{
    AVBufferRef *hw_frames_ref;
    AVHWFramesContext *frames_ctx = NULL;
    int err = 0;

    if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
        fprintf(stderr, "Failed to create cuda frame context.\n");
        return -1;
    }

    frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data);
    frames_ctx->format    = AV_PIX_FMT_CUDA;
    frames_ctx->sw_format = AV_PIX_FMT_NV12;
    frames_ctx->width     = width;
    frames_ctx->height    = height;
    frames_ctx->initial_pool_size = 20;
    if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
        fprintf(stderr, "Failed to initialize cuda frame context."
                "Error code: %d\n",err);
        av_buffer_unref(&hw_frames_ref);
        return err;
    }

    ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
    if (!ctx->hw_frames_ctx)
        err = AVERROR(ENOMEM);

    av_buffer_unref(&hw_frames_ref);
    return err;
}


// Function to decode frames
void decode_frames(int index, AVFormatContext* input_ctx, AVCodecContext* codec_ctx, int videoStreamIndex, std::mutex& mutex, std::queue<AVFrame*>& queues, int expected_reading_frames) 
{
    AVPacket pkt;
    int ret = 0;
    int read_frames = 0;

    while (!decoding_finished[index] && (false == processing_finished)) 
    {
        ret = av_read_frame(input_ctx, &pkt);
        if (ret == AVERROR_EOF)
        {
            decoding_finished[index] = true;

            printf("%d finish read packet\n", index);

            queues.push(nullptr);
            cv[index].notify_one(); // Notify processing thread
            break;
        } 
        else if (ret < 0) 
        {
            std::cerr << "Error reading frame" << std::endl;
            break;
        }

/*
        if (read_frames >= expected_reading_frames)
        {
            decoding_finished[index] = true;

            printf("%d finish read packet(read frames greater than nedded, read_frames=%d, expected_reading_frames=%d)\n", index, read_frames, expected_reading_frames);

            queues.push(nullptr);
            cv[index].notify_one(); // Notify processing thread
            break;            
        }
*/
        if (videoStreamIndex == pkt.stream_index)
        {
            read_frames++;

            // Send packet to the decoder
            ret = avcodec_send_packet(codec_ctx, &pkt);
            if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) 
            {
                std::cerr << "Error sending packet for decoding" << std::endl;
                break;
            }

            // Receive decoded frame from the decoder
            AVFrame* frame = av_frame_alloc();
            if (!frame) 
            {
                std::cerr << "Error allocating frame" << std::endl;
                break;
            }

            ret = avcodec_receive_frame(codec_ctx, frame);
            if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) 
            {
                av_frame_free(&frame);
                continue;
            } 
            else if (ret < 0) 
            {
                std::cerr << "Error receiving frame after decoding" << std::endl;
                av_frame_free(&frame);
                break;
            }

            {
                std::unique_lock<std::mutex> lock(mutex);
                //printf("decode notify_one\n");
                queues.push(frame);
                cv[index].notify_one();
            }
        }

        av_packet_unref(&pkt);
    }

    printf("break decode_frames, index = %d\n", index);
}


// Function to process frames and encode
void process_frames(int index, std::mutex& mutex, std::queue<AVFrame*>& queues, AVFilterContext* buffersrc_ctx) {
    AVFrame* frame = nullptr;

    // Process frames and encode
    while (!processing_finished)
    {
        {
            std::unique_lock<std::mutex> lock(mutex);
            cv[index].wait(lock, [&]{ return !queues.empty(); });
        }

        // Check if processing finished
        if (processing_finished && queues.empty())
            break;

        {
            std::lock_guard<std::mutex> lock(mutex);
            if (!queues.empty())
            {
                frame = queues.front();
                queues.pop();
            } 
            else 
            {
                continue;
            }
        }

        //std::cout<<"begin av_buffersrc_write_frame"<<", index: "<<index<<std::endl;

        // Send the frame to the input of the filter graph
        check_error(av_buffersrc_write_frame(buffersrc_ctx, frame), "Error sending frame to filter graph");

        if (frame == nullptr)
        {
            std::cout<<"break process_frames"<<", index: "<<index<<std::endl;
            break;
        }

        // Free the frame
        av_frame_free(&frame);
    }
}

void save_frame_as_yuv(AVFrame* frame, const std::string& filename) 
{
    static int cnt = 0;
    // æ£æ¥å¸§æ ¼å¼æ¯å¦ä¸º YUV420P
    std::ofstream yuv_file(filename, std::ios::out | std::ios::binary | std::ios::app);
    if (!yuv_file.is_open())
    {
        std::cerr << "Error opening file for writing: " << filename << std::endl;
        return;
    }
    if (frame->format == 121)
    {
        // Write Y component
        for (int y = 0; y < frame->height; ++y)
        {
            yuv_file.write(reinterpret_cast<char *>(frame->data[0] + y * frame->linesize[0]), frame->width);
        }

        // Write UV component interleaved
        for (int y = 0; y < frame->height / 2; ++y)
        {
            yuv_file.write(reinterpret_cast<char *>(frame->data[1] + y * frame->linesize[1]), frame->width);
        }
    }
    else if (frame->format == AV_PIX_FMT_NV12)
    {

        // Write Y component
        for (int y = 0; y < frame->height; ++y)
        {
            yuv_file.write(reinterpret_cast<char *>(frame->data[0] + y * frame->linesize[0]), frame->width);
        }

        // Write U component
        for (int y = 0; y < frame->height / 2; ++y)
        {
            yuv_file.write(reinterpret_cast<char *>(frame->data[1] + y * frame->linesize[1]), frame->width / 2);
        }
        // Write V component
        for (int y = 0; y < frame->height / 2; ++y)
        {
            yuv_file.write(reinterpret_cast<char *>(frame->data[2] + y * frame->linesize[2]), frame->width / 2);
        }
    }
    else 
    {
        std::cerr << "Error: Unsupported frame format: " << frame->format << std::endl; 
    }

    yuv_file.close();

    cnt++;
    printf("write %d frames to yuv file\n", cnt);
}

int main(int argc, char **argv) 
{
    int stop_duration = 60;
    int fps = 25;
    char fps_str[64] = {0};

    if (3 == argc)
    {
        std::string str1 = argv[1];
        fps = std::stoi(str1);

        std::string str2 = argv[2];
        stop_duration = std::stoi(str2);
    }

    printf("stop_duration = %d, fps = %d\n", stop_duration, fps);
    const std::string video_name = "/home/zqiu/video/Bosphorus.mp4";
    std::string input_files[NUM_VIDEOS];
     for (int i = 0; i < NUM_VIDEOS; ++i) {
        // éè¿æ¨¡è¿ç®å¾ªç¯éæ©æä»¶å
        input_files[i] = video_name;
    }

    

    // Open input videos
    AVFormatContext* input_ctx[NUM_VIDEOS] = {nullptr};
    AVCodecContext *codecContext[NUM_VIDEOS] = { nullptr };
    AVFormatContext* output_ctx = nullptr;
    int ret = -1;

    AVFilterContext* src_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* format_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* scale_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* fps_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* tpad_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* hwupload_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* overlay_ctx[NUM_VIDEOS] = {nullptr};
    AVFilterContext* final_fps_ctx = nullptr;
    AVFilterContext* final_scale_ctx = nullptr;
    AVFilterContext* buffersink_ctx = nullptr;
    AVFilterContext* nullsrc_ctx = nullptr;
    AVFilterContext* base_hwupload_ctx = nullptr;

    int expected_reading_frames[NUM_VIDEOS] = {0};
    int expected_padding_duration[NUM_VIDEOS] = {0};

    sprintf(fps_str, "%d", fps);

    enum AVHWDeviceType type;

    /*check hardware device type is supported */
    type = av_hwdevice_find_type_by_name("cuda");
    if(type == AV_HWDEVICE_TYPE_NONE)
    {
        fprintf(stderr, "Device type %s is not supported.\n", "cuda");
        fprintf(stderr, "Available device types:");
        while((type = av_hwdevice_iterate_types(type)) != AV_HWDEVICE_TYPE_NONE)
            fprintf(stderr, " %s",av_hwdevice_get_type_name(type));

        fprintf(stderr,"\n");
        return -1;
    }

    av_assert0(type == AV_HWDEVICE_TYPE_CUDA);
    
    // Create filter graph
    AVFilterGraph* graph = avfilter_graph_alloc();
    check_error(!graph, "Could not allocate filter graph");

    int videoStreamIndex[NUM_VIDEOS] = {-1};

    for (int i = 0; i < NUM_VIDEOS; ++i) 
    {
        ret = avformat_open_input(&input_ctx[i], input_files[i].c_str(), nullptr, nullptr);
        printf("*****ret = %d\n", ret);
        check_error(ret, "***Could not open input file");

        if (input_ctx[i]->duration != AV_NOPTS_VALUE) 
        {
            int64_t duration = input_ctx[i]->duration;
            int64_t seconds = duration / AV_TIME_BASE;

            expected_padding_duration[i] = stop_duration - seconds;

            printf("%s, duration: %ld seconds, expected_padding_duration:%d\n", input_files[i].c_str(), seconds, expected_padding_duration[i]);
        } 
        else 
        {
            printf("Duration not available\n");
        }

        ret = avformat_find_stream_info(input_ctx[i], nullptr);
        check_error(ret, "Could not find stream information");

        AVCodec *codec = nullptr;
        videoStreamIndex[i] = av_find_best_stream(input_ctx[i], AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
        if (videoStreamIndex[i] < 0) 
        {
            std::cerr << "Error finding video stream in file " << input_files[i] << std::endl;
            return 1;
        }

        switch (input_ctx[i]->streams[videoStreamIndex[i]]->codecpar->codec_id)
        {
            case AV_CODEC_ID_H264:
                codec = avcodec_find_decoder_by_name("h264_cuvid");
                break;
            case AV_CODEC_ID_HEVC:
                codec = avcodec_find_decoder_by_name("hevc_cuvid");
                break;    
            case AV_CODEC_ID_MJPEG:
                codec = avcodec_find_decoder_by_name("mjpeg_cuvid");
                break;    
            case AV_CODEC_ID_AV1:
                codec = avcodec_find_decoder_by_name("av1_cuvid");
                break;   
            default:
                std::cerr<<"unsupported codec id:"<<input_ctx[i]->streams[videoStreamIndex[i]]->codecpar->codec_id<<std::endl;
                return AVERROR(ENAVAIL);

        }

        codecContext[i] = avcodec_alloc_context3(codec);
        avcodec_parameters_to_context(codecContext[i], input_ctx[i]->streams[videoStreamIndex[i]]->codecpar);

        codecContext[i]->get_format = get_hw_format;
        codecContext[i]->framerate = av_guess_frame_rate(input_ctx[i], input_ctx[i]->streams[videoStreamIndex[i]], NULL);
        codecContext[i]->pkt_timebase = input_ctx[i]->streams[videoStreamIndex[i]]->time_base;
        codecContext[i]->thread_count = 1;

        if (hw_decoder_init(codecContext[i], type, "0", i) < 0)
        {
            std::cerr<<"hw_decoder_init failed"<<std::endl;
            return -1;
        }

        ret = avcodec_open2(codecContext[i], codec, nullptr);
        check_error(ret < 0, "Error opening codec");

        expected_padding_duration[i] *= (input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.num/input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.den); 
        expected_padding_duration[i] /= fps;

        expected_reading_frames[i] = stop_duration *input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.num/input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.den;

        printf("~~~~ i=%d, num:%d, den:%d, expected_reading_frames: %d, expected_padding_duration=%d\n", i, input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.num, 
                                    input_ctx[i]->streams[videoStreamIndex[i]]->avg_frame_rate.den,
                                    expected_reading_frames[i],
                                    expected_padding_duration[i]);

        //printf("i=%d, num:%d, den:%d\n", i, input_ctx[i]->streams[0]->avg_frame_rate.num, input_ctx[i]->streams[0]->avg_frame_rate.den);
    }

    // Create input filter contexts
    for (int i = 0; i < /*4*/NUM_VIDEOS; ++i) 
    {
        AVStream* stream = input_ctx[i]->streams[0];

        std::string args = "video_size=1920x1080:pix_fmt=nv12:time_base=" +
                           std::to_string(stream->time_base.num) + "/" + std::to_string(stream->time_base.den) +
                           ":pixel_aspect=1/1" + ":frame_rate=" +
                            std::to_string(stream->avg_frame_rate.num) + "/" +
                            std::to_string(stream->avg_frame_rate.den);

        check_error(avfilter_graph_create_filter(&src_ctx[i], avfilter_get_by_name("buffer"), "in", args.c_str(),
                                                 nullptr, graph),
                    "Failed to create buffer source");


        // å¨æè®¡ç®è¡åæ°
        int num_cols = static_cast<int>(std::ceil(std::sqrt(NUM_VIDEOS)));
        int num_rows = static_cast<int>(std::ceil(static_cast<double>(NUM_VIDEOS) / num_cols));

        // è®¡ç®åä¸ªè§é¢çå®½é«
        int frame_width = 1920 / num_cols;
        int frame_height = 1080 / num_rows;

        // è®¡ç®è§é¢çä½ç½®
        int x = (i % num_cols) * frame_width;
        int y = (i / num_cols) * frame_height;

        printf("scale set num_rows = %d, num_cols = %d, x = %d, y = %d\n", num_rows, num_cols, x, y);

        // çæ overlay æ»¤éçåæ°
        std::string scale_filter = "w=" + std::to_string(frame_width) + ":h=" + std::to_string(frame_height);

        std::string overlay_filter = "shortest=1:x=" + std::to_string(x) + ":y=" + std::to_string(y);


        // åå»º format æ»¤éï¼å° nv12 è½¬æ¢ä¸º yuv420p
        check_error(avfilter_graph_create_filter(&format_ctx[i], avfilter_get_by_name("format"), ("format_" + std::to_string(i)).c_str(), "yuv420p", nullptr, graph),
                    "Failed to create format filter");

        // Create scale filter context
        check_error(avfilter_graph_create_filter(&scale_ctx[i], avfilter_get_by_name("scale_cuda"), 
                                                    ("scale_" + std::to_string(i)).c_str(), scale_filter.c_str(), nullptr,
                                                    graph),
                                                "Failed to create scale filter");

        check_error(avfilter_graph_create_filter(&overlay_ctx[i], avfilter_get_by_name("overlay_cuda"), ("overlay_" + std::to_string(i)).c_str(), overlay_filter.c_str(), nullptr, graph),
                    "Failed to create overlay filter");

        // Create fps filter context
        check_error(avfilter_graph_create_filter(&fps_ctx[i], avfilter_get_by_name("fps"), ("fps_" + std::to_string(i)).c_str(), fps_str, nullptr, graph),
                    "Failed to create fps filter");

        if (expected_padding_duration[i] > 0)
        {
            char tpad_str[256] = {0};
            sprintf(tpad_str, "stop_mode=clone:stop_duration=%d", expected_padding_duration[i]);
            // Create tpad filter context
            check_error(avfilter_graph_create_filter(&tpad_ctx[i], 
                                                        avfilter_get_by_name("tpad"), ("tpad_" + std::to_string(i)).c_str(),
                                                        //"stop_mode=clone:stop_duration=60", nullptr, graph),
                                                        tpad_str, nullptr, graph),
                                                        "Failed to create tpad filter");
        }

        // Create hwupload_cuda filter context
        check_error(avfilter_graph_create_filter(&hwupload_ctx[i], avfilter_get_by_name("hwupload_cuda"), ("hwupload_cuda_" + std::to_string(i)).c_str(), nullptr, nullptr, graph),
                    "Failed to create hwupload_cuda filter");            

        // Link filter contexts
        check_error(avfilter_link(src_ctx[i], 0, fps_ctx[i], 0), "Failed to link source to fps filter");

        check_error(avfilter_link(fps_ctx[i], 0, format_ctx[i], 0), "Failed to link scale to format filter");

        if (expected_padding_duration[i] > 0)
        {
            check_error(avfilter_link(format_ctx[i], 0, tpad_ctx[i], 0), "Failed to link fps to tpad filter");
            check_error(avfilter_link(tpad_ctx[i], 0, hwupload_ctx[i], 0), "Failed to link tpd to hwupload filter");
        }
        else
        {
            check_error(avfilter_link(format_ctx[i], 0, hwupload_ctx[i], 0), "Failed to fps to hwupload filter");
        }

        check_error(avfilter_link(hwupload_ctx[i], 0, scale_ctx[i], 0), "Failed to link hwupload to scale filter");
    }

    // Create nullsrc filter context
    check_error(avfilter_graph_create_filter(&nullsrc_ctx, avfilter_get_by_name("nullsrc"), "base", "1920x1080", nullptr, graph),
                "Failed to create nullsrc filter");

    // Create hwupload_cuda filter context
    check_error(avfilter_graph_create_filter(&base_hwupload_ctx, avfilter_get_by_name("hwupload_cuda"), "hwupload_cuda_base", nullptr, nullptr, graph),
                    "Failed to create hwupload_cuda(base) filter"); 

    check_error(avfilter_link(nullsrc_ctx, 0, base_hwupload_ctx, 0), "Failed to link nullsrc to overlay");                               

    // Create overlay filters for each input
    #if 0
    for (int i = 0; i < /*4*/NUM_VIDEOS; ++i) 
    {
        //std::string overlay_filter = "shortest=1:x=" + std::to_string((i % 2) * 960) + ":y=" + std::to_string((i / 2) * 540);
        std::cout<<"i = "<<i<<", filter: "<<overlay_filter<<std::endl;
        check_error(avfilter_graph_create_filter(&overlay_ctx[i], avfilter_get_by_name("overlay_cuda"), ("overlay_" + std::to_string(i)).c_str(), overlay_filter.c_str(), nullptr, graph),
                    "Failed to create overlay filter");
    }
    #endif

    // Link nullsrc filter to the first overlay filter
    check_error(avfilter_link(base_hwupload_ctx, 0, overlay_ctx[0], 0), "Failed to link base_hwupload_ctx to overlay");
    
    check_error(avfilter_link(scale_ctx[0], 0, overlay_ctx[0], 1), "Failed to link scale to overlay..");

    for (int i = 1; i < /*4*/NUM_VIDEOS; ++i) 
    {
        check_error(avfilter_link(overlay_ctx[i - 1], 0, overlay_ctx[i], 0), "Failed to link overlay");
        check_error(avfilter_link(scale_ctx[i], 0, overlay_ctx[i], 1), "Failed to link scale to overlay...");
    }

    // Create fps filter context
    check_error(avfilter_graph_create_filter(&final_fps_ctx, avfilter_get_by_name("fps"), "final_fps", fps_str, nullptr, graph),
                    "Failed to create final fps filter");

    // Create final scale filter context
    check_error(avfilter_graph_create_filter(&final_scale_ctx, avfilter_get_by_name("scale_cuda"), "final_scale", "w=1920:h=1080", nullptr, graph),
                "Failed to create final scale filter");

    // Create final scale filter context
    //check_error(avfilter_graph_create_filter(&format_ctx, avfilter_get_by_name("format"), "final_format", "nv12", nullptr, graph),
    //            "Failed to create final format filter");

    // Create buffersink filter context
    check_error(avfilter_graph_create_filter(&buffersink_ctx, avfilter_get_by_name("buffersink"), "out", nullptr, nullptr, graph),
                "Failed to create buffer sink");

    // Link final fps to overlay
    check_error(avfilter_link(overlay_ctx[/*3*/NUM_VIDEOS-1], 0, final_fps_ctx, 0), "Failed to link overlay to final fps");
    //check_error(avfilter_link(overlay_ctx[3], 0, final_scale_ctx, 0), "Failed to link overlay to final scale");

    // Link final fps to overlay
    check_error(avfilter_link(final_fps_ctx, 0, final_scale_ctx, 0), "Failed to link final scale to buffersink");

    // Link final scale to format
    //check_error(avfilter_link(final_scale_ctx, 0, format_ctx, 0), "Failed to link final scale to format");

    // Link final format to buffersink
    check_error(avfilter_link(final_scale_ctx, 0, buffersink_ctx, 0), "Failed to link final format to buffersink");

    // Configure the filter graph
    check_error(avfilter_graph_config(graph, nullptr), "Failed to configure filter graph");

    printf("dump graph: %s\n", avfilter_graph_dump(graph, NULL));

    AVFilterLink *outlink = buffersink_ctx->inputs[0];
    AVRational frame_rate = outlink->frame_rate;

    printf("got Frame Rate: %d/%d\n", frame_rate.num, frame_rate.den);

    // Open output file
        /* create cuda hardware deivce context in card0 */
    ret = av_hwdevice_ctx_create(&hw_device_ctx_enc, AV_HWDEVICE_TYPE_CUDA, "0", NULL, 0);
    if (ret < 0) 
    {
        fprintf(stderr, "--Failed to create a cuda device. Error code: %d\n", ret);
        return -1;
    }

    check_error(avformat_alloc_output_context2(&output_ctx, nullptr, nullptr, "output.mp4"),
                "Failed to allocate output context");
    check_error(!output_ctx, "Could not create output context");

    // Find the encoder for the codec
    //const AVCodec* encoder = avcodec_find_encoder(output_stream->codecpar->codec_id);
    const AVCodec* encoder = avcodec_find_encoder_by_name("h264_mxenc");
    check_error(!encoder, "Failed to find encoder");

    // Add video stream to output file
    AVStream* output_stream = avformat_new_stream(output_ctx, encoder);
    check_error(!output_stream, "Failed to allocate output stream");

    // Create a codec context for encoding
    AVCodecContext* codec_ctx = avcodec_alloc_context3(encoder);
    check_error(!codec_ctx, "Failed to allocate codec context");

    check_error(set_hwframe_ctx(codec_ctx, 1920, 1080, hw_device_ctx_enc), "Failed to set hw_device_ctx_enc");

    ret = avcodec_parameters_from_context(output_stream->codecpar, codecContext[0]);
    if (ret < 0) {
        std::cerr << "Error copying codec parameters" << std::endl;
        return 1;
    }

    // Open output file for writing
    check_error(avio_open(&output_ctx->pb, "output.mp4", AVIO_FLAG_WRITE), "Failed to open output file");

    // Write file header
    check_error(avformat_write_header(output_ctx, nullptr), "Failed to write file header");

    codec_ctx->bit_rate = 8000000;
    codec_ctx->width = 1920;
    codec_ctx->height = 1080;
    codec_ctx->gop_size = 50;
    //codec_ctx->framerate = (AVRational){fps, 1};
    codec_ctx->time_base = (AVRational){1, fps};
    codec_ctx->pix_fmt = AV_PIX_FMT_YUV420P;//AV_PIX_FMT_YUV420P;
    codec_ctx->max_b_frames = 0;

    av_opt_set(codec_ctx->priv_data, "preset", "medium", 0);

/*
    printf("codec_ctx->bit_rate: %lld\n", codec_ctx->bit_rate);
    printf("codec_ctx->width: %d\n", codec_ctx->width);
    printf("codec_ctx->height: %d\n", codec_ctx->height);
    printf("codec_ctx->gop_size: %d\n", codec_ctx->gop_size);
    printf("codec_ctx->max_b_frames: %d\n", codec_ctx->max_b_frames);
    printf("codec_ctx->pix_fmt: %d\n", codec_ctx->pix_fmt);
*/

    // Open the encoder
    //ret = avcodec_open2(codec_ctx, encoder, nullptr);
    //check_error(ret < 0, "Failed to open encoder");

    // Copy the codec context back to the output stream parameters
    //ret = avcodec_parameters_from_context(output_stream->codecpar, codec_ctx);
    //check_error(ret < 0, "Failed to copy codec parameters to output stream");


    // Create queues for each video
    std::vector<std::queue<AVFrame*>> queues(NUM_VIDEOS);
    for (int i = 0; i < NUM_VIDEOS; ++i) 
    {
        queues[i] = std::queue<AVFrame*>();
    }

    // Create mutex and condition variable for synchronization
    std::vector<std::mutex> mutex(NUM_VIDEOS);

    // Start decoding threads
    std::vector<std::thread> decode_threads;
    for (int i = 0; i < NUM_VIDEOS; ++i) 
    {
        decode_threads.emplace_back(decode_frames, i, input_ctx[i], codecContext[i], videoStreamIndex[i], std::ref(mutex[i]), std::ref(queues[i]), expected_reading_frames[i]);
    }

    // Start processing and encoding threads
    /*
    std::vector<std::thread> process_threads;
    for (int i = 0; i < NUM_VIDEOS; ++i) 
    {
        process_threads.emplace_back(process_frames, i, std::ref(mutex[i]), std::ref(queues[i]), src_ctx[i]);
    }
    */

////////////
    int frame_count = 0;

    AVFrame* frame = av_frame_alloc();
    check_error(!frame, "Failed to allocate frame");

    AVFrame* frame_decode = nullptr;
    bool need_handle = false;
    int finish_count = 0;
    bool got = false;

    while (true) 
    {
        need_handle = false;
        
        if (finish_count < NUM_VIDEOS)
        {
            for (int i = 0; i < NUM_VIDEOS; i++)
            {
                got = false;

                frame_decode = nullptr;

                {
                    std::lock_guard<std::mutex> lock(mutex[i]);
                    if (!queues[i].empty())
                    {
                        frame_decode = queues[i].front();
                        got = true;
                        queues[i].pop();
                    }
                }

                if (true == got)
                {
                    need_handle = true;

                    check_error(av_buffersrc_write_frame(src_ctx[i], frame_decode), "Error sending frame to filter graph");

                    //check_error(av_buffersrc_add_frame_flags(src_ctx[i], frame_decode, AV_BUFFERSRC_FLAG_PUSH), "Error sending frame to filter graph");

                    if (frame_decode == nullptr)
                    {
                        finish_count++;
                        std::cout<<"finish got_frames"<<", index: "<<i<<", finish_count: "<<finish_count<<std::endl;
                        //break;
                    }

                    // Free the frame
                    av_frame_free(&frame_decode); 
                }
            }
        }

        if ((false == need_handle) && (finish_count < NUM_VIDEOS))
        {
            usleep(10*1000);
            printf("need_handle is false, sleep 10ms\n");
            continue;
        }

        bool exit = false;

        while(true)
        {
            ret = av_buffersink_get_frame(buffersink_ctx, frame);
            if (ret == AVERROR(EAGAIN)) 
            {
                // No more frames or need more input
                //printf("av_buffersink_get_frame return EAGAIN\n");
                //continue;
                if (finish_count == NUM_VIDEOS)
                {
                    exit = true;
                }

                break;
            }
            else if (ret == AVERROR_EOF || frame_count > fps*stop_duration) 
            {
                // No more frames or need more input
                printf("av_buffersink_get_frame return AVERROR_EOF\n");
                processing_finished = true;
                exit = true;

                break;
            }  
            else if (ret < 0) 
            {
                // Error getting frame
                check_error(ret, "Error getting filtered frame");
            }

            printf("av_buffersink_get_frame got pts = %ld, width: %d, height: %d\n", frame->pts, frame->width, frame->height);

            av_frame_unref(frame);

            frame_count++;
        }

        if (exit == true)
        {
            break;
        }
    }

    // Write trailer to output file
    //av_write_trailer(output_ctx);

    av_frame_free(&frame);

////////////

    // Join decoding threads
    for (auto& thread : decode_threads) 
    {
        thread.join();
    }

    // Clean up
    for (int i = 0; i < NUM_VIDEOS; ++i) 
    {
        avcodec_free_context(&codecContext[i]);
        avformat_close_input(&input_ctx[i]);
    }

    //avfilter_graph_free(&graph);
    //avcodec_free_context(&codec_ctx);
    avformat_free_context(output_ctx);

    return 0;
}

_______________________________________________
ffmpeg-user mailing list
ffmpeg-user@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-user

To unsubscribe, visit link above, or email
ffmpeg-user-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-user] When using the multi overlay filter, the memory usage becomes excessively high.

Reply via email to