[FFmpeg-devel] [PATCH] Add NVENC encoder

Timo Rothenpieler Wed, 26 Nov 2014 02:23:05 -0800

This patch adds support for encoding with Nvidia NVENC on Windows and Linux.

I'm not sure if this needs to be flagged as nonfree. As far as I'm aware, it should not affect how the resulting binaries can be redistributed.

The only dependency this has is the nvEncodeAPI.h from the NVENC SDK, which can be downloaded from Nvidia:


https://developer.nvidia.com/nvidia-video-codec-sdk

The header is somewhat hidden in the Samples directory.

I attached the current version of the patch.
It can also be found on my github fork:

https://github.com/BtbN/FFmpeg/tree/nvenc

From e59915fc771ca1c5afd0c5c7ed04227933206a39 Mon Sep 17 00:00:00 2001
From: Timo Rothenpieler <t...@rothenpieler.org>
Date: Wed, 26 Nov 2014 11:08:11 +0100
Subject: [PATCH] Add NVENC encoder

---
 configure               |   11 +-
 libavcodec/Makefile     |    1 +
 libavcodec/allcodecs.c  |    1 +
 libavcodec/nvenc.c      | 1015 +++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/nvenc.h      |   25 ++
 libavcodec/nvenc_api.c  |  330 +++++++++++++++
 libavcodec/nvenc_api.h  |   38 ++
 libavcodec/nvenc_cuda.h |   62 +++
 8 files changed, 1481 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/nvenc.c
 create mode 100644 libavcodec/nvenc.h
 create mode 100644 libavcodec/nvenc_api.c
 create mode 100644 libavcodec/nvenc_api.h
 create mode 100644 libavcodec/nvenc_cuda.h

diff --git a/configure b/configure
index 38619c4..d0b790c 100755
--- a/configure
+++ b/configure
@@ -261,6 +261,7 @@ External library support:
   --enable-libzvbi         enable teletext support via libzvbi [no]
   --disable-lzma           disable lzma [autodetect]
   --enable-decklink        enable Blackmagick DeckLink I/O support [no]
+  --enable-nvenc           enable NVIDIA NVENC support [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
   --enable-opencl          enable OpenCL code
   --enable-opengl          enable OpenGL rendering [no]
@@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST="
     libzmq
     libzvbi
     lzma
+    nvenc
     openal
     opencl
     opengl
@@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid"
 libutvideo_decoder_deps="libutvideo"
 libutvideo_encoder_deps="libutvideo"
 libzvbi_teletext_decoder_deps="libzvbi"
+nvenc_encoder_deps="nvenc"
 
 # demuxers / muxers
 ac3_demuxer_select="ac3_parser"
@@ -2569,9 +2572,7 @@ drawtext_filter_deps="libfreetype"
 ebur128_filter_deps="gpl"
 flite_filter_deps="libflite"
 frei0r_filter_deps="frei0r dlopen"
-frei0r_filter_extralibs='$ldl'
 frei0r_src_filter_deps="frei0r dlopen"
-frei0r_src_filter_extralibs='$ldl'
 geq_filter_deps="gpl"
 histeq_filter_deps="gpl"
 hqdn3d_filter_deps="gpl"
@@ -4650,6 +4651,11 @@ elif check_func dlopen -ldl; then
     ldl=-ldl
 fi
 
+# set a few flags which depend on ldl and can't be set earlier
+nvenc_encoder_extralibs='$ldl'
+frei0r_filter_extralibs='$ldl'
+frei0r_src_filter_extralibs='$ldl'
+
 if ! disabled network; then
     check_func getaddrinfo $network_extralibs
     check_func getservbyport $network_extralibs
@@ -4913,6 +4919,7 @@ enabled libxavs           && require libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
 enabled libzmq            && require_pkg_config libzmq zmq.h zmq_ctx_new
 enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
+enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; }
 enabled openal            && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                                check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
                                die "ERROR: openal not found"; } &&
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa0f53d..cc41564 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER)           += mxpegdec.o
 OBJS-$(CONFIG_NELLYMOSER_DECODER)      += nellymoserdec.o nellymoser.o
 OBJS-$(CONFIG_NELLYMOSER_ENCODER)      += nellymoserenc.o nellymoser.o
 OBJS-$(CONFIG_NUV_DECODER)             += nuv.o rtjpeg.o
+OBJS-$(CONFIG_NVENC_ENCODER)           += nvenc.o nvenc_api.o
 OBJS-$(CONFIG_ON2AVC_DECODER)          += on2avc.o on2avcdata.o
 OBJS-$(CONFIG_OPUS_DECODER)            += opusdec.o opus.o opus_celt.o \
                                           opus_imdct.o opus_silk.o     \
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0d39d33..8ceee2f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -223,6 +223,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(MVC2,              mvc2);
     REGISTER_DECODER(MXPEG,             mxpeg);
     REGISTER_DECODER(NUV,               nuv);
+    REGISTER_ENCODER(NVENC,             nvenc);
     REGISTER_DECODER(PAF_VIDEO,         paf_video);
     REGISTER_ENCDEC (PAM,               pam);
     REGISTER_ENCDEC (PBM,               pbm);
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
new file mode 100644
index 0000000..d659cf8
--- /dev/null
+++ b/libavcodec/nvenc.c
@@ -0,0 +1,1015 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+//#pragma optimize("", off)
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#include "libavutil/internal.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "avcodec.h"
+#include "internal.h"
+
+#include "nvenc_cuda.h"
+#include "nvenc_api.h"
+
+typedef struct NvencInputSurface
+{
+    NV_ENC_INPUT_PTR inputSurface;
+    int width;
+    int height;
+
+    int lockCount;
+
+    NV_ENC_BUFFER_FORMAT format;
+} NvencInputSurface;
+
+typedef struct NvencOutputSurface
+{
+    NV_ENC_OUTPUT_PTR outputSurface;
+    int size;
+
+    NvencInputSurface *inputSurface;
+
+    int busy;
+} NvencOutputSurface;
+
+typedef struct NvencOutputSurfaceList
+{
+    NvencOutputSurface *surface;
+    struct NvencOutputSurfaceList *next;
+} NvencOutputSurfaceList;
+
+typedef struct NvencTimestampList
+{
+    int64_t timestamp;
+    struct NvencTimestampList *next;
+} NvencTimestampList;
+
+typedef struct NvencContext
+{
+    AVClass *avclass;
+
+    NV_ENC_INITIALIZE_PARAMS initEncodeParams;
+    NV_ENC_CONFIG encodeConfig;
+    CUcontext cuContext;
+
+    int maxSurfaceCount;
+    NvencInputSurface *inputSurfaces;
+    NvencOutputSurface *outputSurfaces;
+
+    NvencOutputSurfaceList *outputSurfaceQueue;
+    NvencOutputSurfaceList *outputSurfaceReadyQueue;
+    NvencTimestampList *timestampList;
+    int64_t lastDts;
+
+    void *nvencoder;
+
+    char *profile;
+    char *preset;
+    int cqp;
+    int cbr;
+    int twopass;
+    int gobpattern;
+} NvencContext;
+
+static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
+
+static void out_surf_queue_push(NvencOutputSurfaceList** head, NvencOutputSurface *surface)
+{
+    if (*head == 0)
+    {
+        *head = (NvencOutputSurfaceList*)malloc(sizeof(NvencOutputSurfaceList));
+        (*head)->next = 0;
+        (*head)->surface = surface;
+        return;
+    }
+
+    while ((*head)->next)
+        head = &((*head)->next);
+
+    (*head)->next = (NvencOutputSurfaceList*)malloc(sizeof(NvencOutputSurfaceList));
+    (*head)->next->next = 0;
+    (*head)->next->surface = surface;
+}
+
+static NvencOutputSurface *out_surf_queue_pop(NvencOutputSurfaceList** head)
+{
+    NvencOutputSurfaceList *tmp;
+    NvencOutputSurface *res;
+
+    if (*head == 0)
+        return 0;
+
+    tmp = *head;
+    res = tmp->surface;
+    *head = tmp->next;
+    free(tmp);
+
+    return res;
+}
+
+static void timestamp_list_insert_sorted(NvencTimestampList** head, int64_t timestamp)
+{
+    NvencTimestampList *newelem;
+    NvencTimestampList *prev;
+
+    if (*head == 0)
+    {
+        *head = (NvencTimestampList*)malloc(sizeof(NvencTimestampList));
+        (*head)->next = 0;
+        (*head)->timestamp = timestamp;
+        return;
+    }
+
+    prev = 0;
+    while (*head != 0 && timestamp >= (*head)->timestamp)
+    {
+        prev = *head;
+        head = &((*head)->next);
+    }
+
+    newelem = (NvencTimestampList*)malloc(sizeof(NvencTimestampList));
+    newelem->next = *head;
+    newelem->timestamp = timestamp;
+
+    if (*head)
+        *head = newelem;
+    else
+        prev->next = newelem;
+}
+
+static int64_t timestamp_list_get_lowest(NvencTimestampList** head)
+{
+    NvencTimestampList *tmp;
+    int64_t res;
+
+    if (*head == 0)
+        return 0;
+
+    tmp = *head;
+    res = tmp->timestamp;
+    *head = tmp->next;
+    free(tmp);
+
+    return res;
+}
+
+static av_cold int nvenc_encode_init(AVCodecContext *avctx)
+{
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
+    NV_ENC_PRESET_CONFIG presetConfig = { 0 };
+    CUcontext cuContextCurr;
+    GUID encoderPreset = NV_ENC_PRESET_HQ_GUID;
+    GUID license = dummy_license;
+    NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
+    int surfaceCount = 0;
+    int i, numMBs;
+    int isLL = 0;
+
+    NvencContext *ctx = avctx->priv_data;
+
+    if (!nvenc_dyload_nvenc(avctx))
+        return AVERROR_EXTERNAL;
+
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+
+    memset(&ctx->initEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
+    memset(&ctx->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
+
+    ctx->outputSurfaceQueue = 0;
+    ctx->outputSurfaceReadyQueue = 0;
+    ctx->timestampList = 0;
+    ctx->lastDts = AV_NOPTS_VALUE;
+    ctx->nvencoder = 0;
+
+    ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
+    ctx->initEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+    presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+    presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+    stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
+    stEncodeSessionParams.clientKeyPtr = &license;
+
+    ctx->cuContext = 0;
+    if (cuCtxCreate(&ctx->cuContext, 0, pNvencDevices[iNvencUseDeviceID]) != CUDA_SUCCESS
+        || cuCtxPopCurrent(&cuContextCurr) != CUDA_SUCCESS)
+    {
+        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC\n");
+        goto error;
+    }
+
+    stEncodeSessionParams.device = (void*)ctx->cuContext;
+    stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+    nvStatus = pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &ctx->nvencoder);
+    if (nvStatus != NV_ENC_SUCCESS)
+    {
+        ctx->nvencoder = 0;
+        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nvStatus);
+        goto error;
+    }
+
+    if (ctx->preset)
+    {
+        if (strcmp(ctx->preset, "hp") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_HP_GUID;
+        }
+        else if (strcmp(ctx->preset, "hq") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_HQ_GUID;
+        }
+        else if (strcmp(ctx->preset, "bd") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_BD_GUID;
+        }
+        else if (strcmp(ctx->preset, "ll") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
+            isLL = 1;
+        }
+        else if (strcmp(ctx->preset, "llhp") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
+            isLL = 1;
+        }
+        else if (strcmp(ctx->preset, "llhq") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
+            isLL = 1;
+        }
+        else if (strcmp(ctx->preset, "default") == 0)
+        {
+            encoderPreset = NV_ENC_PRESET_DEFAULT_GUID;
+        }
+        else
+        {
+            av_log(avctx, AV_LOG_ERROR, "Preset \"%s\" is unknown!\n", ctx->preset);
+        }
+    }
+
+    nvStatus = pNvEnc->nvEncGetEncodePresetConfig(ctx->nvencoder, NV_ENC_CODEC_H264_GUID, encoderPreset, &presetConfig);
+    if (nvStatus != NV_ENC_SUCCESS)
+    {
+        av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nvStatus);
+        goto error;
+    }
+
+    ctx->initEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
+    ctx->initEncodeParams.encodeHeight = avctx->height;
+    ctx->initEncodeParams.encodeWidth = avctx->width;
+    ctx->initEncodeParams.darHeight = avctx->height;
+    ctx->initEncodeParams.darWidth = avctx->width;
+    ctx->initEncodeParams.frameRateNum = avctx->time_base.den;
+    ctx->initEncodeParams.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
+
+    numMBs = ((avctx->width + 15) / 16) * ((avctx->height + 15) / 16);
+    ctx->maxSurfaceCount = (numMBs >= 8160) ? 16 : 32;
+
+    ctx->initEncodeParams.enableEncodeAsync = 0;
+    ctx->initEncodeParams.enablePTD = 1;
+
+    ctx->initEncodeParams.presetGUID = encoderPreset;
+
+    ctx->initEncodeParams.encodeConfig = &ctx->encodeConfig;
+    memcpy(&ctx->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+    ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
+
+    if (avctx->gop_size >= 0)
+    {
+        ctx->encodeConfig.gopLength = avctx->gop_size;
+        ctx->encodeConfig.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
+    }
+
+    if (avctx->bit_rate > 0)
+        ctx->encodeConfig.rcParams.averageBitRate = avctx->bit_rate;
+
+    if (avctx->rc_max_rate > 0)
+        ctx->encodeConfig.rcParams.maxBitRate = avctx->rc_max_rate;
+
+    if (ctx->cbr)
+    {
+        if (ctx->twopass == 0)
+        {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        }
+        else if (ctx->twopass == 1 || isLL)
+        {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
+
+            ctx->encodeConfig.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
+            ctx->encodeConfig.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
+        }
+        else
+        {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        }
+    }
+    else if (ctx->cqp >= 0)
+    {
+        ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+        ctx->encodeConfig.rcParams.constQP.qpInterB = ctx->cqp;
+        ctx->encodeConfig.rcParams.constQP.qpInterP = ctx->cqp;
+        ctx->encodeConfig.rcParams.constQP.qpIntra = ctx->cqp;
+
+        avctx->qmin = -1;
+        avctx->qmax = -1;
+    }
+    else if (avctx->qmin >= 0 && avctx->qmax >= 0)
+    {
+        ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+
+        ctx->encodeConfig.rcParams.enableMinQP = 1;
+        ctx->encodeConfig.rcParams.enableMaxQP = 1;
+
+        ctx->encodeConfig.rcParams.minQP.qpInterB = avctx->qmin;
+        ctx->encodeConfig.rcParams.minQP.qpInterP = avctx->qmin;
+        ctx->encodeConfig.rcParams.minQP.qpIntra = avctx->qmin;
+
+        ctx->encodeConfig.rcParams.maxQP.qpInterB = avctx->qmax;
+        ctx->encodeConfig.rcParams.maxQP.qpInterP = avctx->qmax;
+        ctx->encodeConfig.rcParams.maxQP.qpIntra = avctx->qmax;
+    }
+
+    if (avctx->rc_buffer_size > 0)
+        ctx->encodeConfig.rcParams.vbvBufferSize = avctx->rc_buffer_size;
+
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
+        ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
+    else
+        ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
+
+    if (!ctx->profile)
+    {
+        switch (avctx->profile)
+        {
+        case FF_PROFILE_H264_BASELINE:
+            ctx->profile = av_strdup("baseline");
+            break;
+        case FF_PROFILE_H264_MAIN:
+            ctx->profile = av_strdup("main");
+            break;
+        default:
+	    ctx->profile = av_strdup("high");
+            break;
+        }
+    }
+
+    ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+
+    if (strcmp(ctx->profile, "high") == 0)
+    {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+    }
+    else if (strcmp(ctx->profile, "main") == 0)
+    {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
+    }
+    else if (strcmp(ctx->profile, "baseline") == 0)
+    {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+    }
+    else
+    {
+        av_log(avctx, AV_LOG_WARNING, "Unknown profile requested: %s\n", ctx->profile);
+    }
+
+    if (ctx->gobpattern >= 0)
+    {
+        ctx->encodeConfig.frameIntervalP = 1;
+    }
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+
+    nvStatus = pNvEnc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->initEncodeParams);
+    if (nvStatus != NV_ENC_SUCCESS)
+    {
+        av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nvStatus);
+        goto error;
+    }
+
+    ctx->inputSurfaces = (NvencInputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencInputSurface));
+    ctx->outputSurfaces = (NvencOutputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencOutputSurface));
+
+    for (surfaceCount = 0; surfaceCount < ctx->maxSurfaceCount; ++surfaceCount)
+    {
+        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
+        NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
+        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
+        allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+
+        allocSurf.width = (avctx->width + 31) & ~31;
+        allocSurf.height = (avctx->height + 31) & ~31;
+
+        allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        switch (avctx->pix_fmt)
+        {
+        case AV_PIX_FMT_YUV420P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
+            break;
+        case AV_PIX_FMT_NV12:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
+            break;
+        case AV_PIX_FMT_YUV444P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
+            break;
+        default:
+            av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
+            goto error;
+        }
+
+        nvStatus = pNvEnc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
+        if (nvStatus = NV_ENC_SUCCESS)
+        {
+            av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
+            goto error;
+        }
+
+        ctx->inputSurfaces[surfaceCount].lockCount = 0;
+        ctx->inputSurfaces[surfaceCount].inputSurface = allocSurf.inputBuffer;
+        ctx->inputSurfaces[surfaceCount].format = allocSurf.bufferFmt;
+        ctx->inputSurfaces[surfaceCount].width = allocSurf.width;
+        ctx->inputSurfaces[surfaceCount].height = allocSurf.height;
+
+        allocOut.size = 1024 * 1024;
+        allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        nvStatus = pNvEnc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
+        if (nvStatus = NV_ENC_SUCCESS)
+        {
+            av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
+            ctx->outputSurfaces[surfaceCount++].outputSurface = 0;
+            goto error;
+        }
+
+        ctx->outputSurfaces[surfaceCount].outputSurface = allocOut.bitstreamBuffer;
+        ctx->outputSurfaces[surfaceCount].size = allocOut.size;
+        ctx->outputSurfaces[surfaceCount].busy = 0;
+    }
+
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER)
+    {
+        uint32_t outSize = 0;
+        char tmpHeader[256];
+        NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
+        payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+
+        payload.spsppsBuffer = tmpHeader;
+        payload.inBufferSize = 256;
+        payload.outSPSPPSPayloadSize = &outSize;
+
+        nvStatus = pNvEnc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
+        if (nvStatus != NV_ENC_SUCCESS)
+        {
+            av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
+            goto error;
+        }
+
+        avctx->extradata_size = outSize;
+        avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE);
+
+        memcpy(avctx->extradata, tmpHeader, outSize);
+    }
+    else
+    {
+        avctx->extradata = 0;
+        avctx->extradata_size = 0;
+    }
+
+    if (ctx->encodeConfig.frameIntervalP > 1)
+        avctx->has_b_frames = 2;
+
+    if (ctx->encodeConfig.rcParams.averageBitRate > 0)
+        avctx->bit_rate = ctx->encodeConfig.rcParams.averageBitRate;
+
+    return 0;
+
+error:
+
+    for (i = 0; i < surfaceCount; ++i)
+    {
+        pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
+        if (ctx->outputSurfaces[i].outputSurface)
+            pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
+    }
+
+    if (ctx->nvencoder)
+        pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
+
+    if (ctx->cuContext)
+        cuCtxDestroy(ctx->cuContext);
+
+    nvenc_unload_nvenc(avctx);
+
+    ctx->nvencoder = 0;
+    ctx->cuContext = 0;
+
+    return AVERROR_EXTERNAL;
+}
+
+static av_cold int nvenc_encode_close(AVCodecContext *avctx)
+{
+    NvencContext *ctx = avctx->priv_data;
+    int i;
+
+    if (ctx->profile)
+        av_freep(&ctx->profile);
+
+    if (avctx->extradata)
+        av_freep(&avctx->extradata);
+
+    while (ctx->timestampList)
+        timestamp_list_get_lowest(&ctx->timestampList);
+
+    while (ctx->outputSurfaceReadyQueue)
+        out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
+
+    while (ctx->outputSurfaceQueue)
+        out_surf_queue_pop(&ctx->outputSurfaceQueue);
+
+    for (i = 0; i < ctx->maxSurfaceCount; ++i)
+    {
+        pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
+        pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
+    }
+    ctx->maxSurfaceCount = 0;
+
+    pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
+    ctx->nvencoder = 0;
+
+    cuCtxDestroy(ctx->cuContext);
+    ctx->cuContext = 0;
+
+    nvenc_unload_nvenc(avctx);
+
+    av_frame_free(&avctx->coded_frame);
+
+    return 0;
+}
+
+static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
+{
+    NvencContext *ctx = avctx->priv_data;
+    uint32_t *sliceOffsets = (uint32_t*)calloc(ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData, sizeof(uint32_t));
+    NV_ENC_LOCK_BITSTREAM lockParams = { 0 };
+    NVENCSTATUS nvStatus;
+
+    lockParams.version = NV_ENC_LOCK_BITSTREAM_VER;
+
+    lockParams.doNotWait = 0;
+    lockParams.outputBitstream = tmpoutsurf->outputSurface;
+    lockParams.sliceOffsets = sliceOffsets;
+
+    nvStatus = pNvEnc->nvEncLockBitstream(ctx->nvencoder, &lockParams);
+    if (nvStatus != NV_ENC_SUCCESS)
+    {
+        av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
+        timestamp_list_get_lowest(&ctx->timestampList);
+        return 0;
+    }
+
+    if (ff_alloc_packet2(avctx, pkt, lockParams.bitstreamSizeInBytes) < 0)
+    {
+        pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
+        timestamp_list_get_lowest(&ctx->timestampList);
+        return 0;
+    }
+
+    memcpy(pkt->data, lockParams.bitstreamBufferPtr, lockParams.bitstreamSizeInBytes);
+
+    nvStatus = pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
+    if (nvStatus != NV_ENC_SUCCESS)
+        av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
+
+    switch (lockParams.pictureType)
+    {
+    case NV_ENC_PIC_TYPE_IDR:
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    case NV_ENC_PIC_TYPE_I:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case NV_ENC_PIC_TYPE_P:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case NV_ENC_PIC_TYPE_B:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        break;
+    case NV_ENC_PIC_TYPE_BI:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
+        break;
+    default:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE;
+        break;
+    }
+
+    pkt->pts = lockParams.outputTimeStamp;
+    pkt->dts = timestamp_list_get_lowest(&ctx->timestampList) - ctx->encodeConfig.frameIntervalP;
+
+    if (pkt->dts > pkt->pts)
+        pkt->dts = pkt->pts;
+
+    if (ctx->lastDts != AV_NOPTS_VALUE && pkt->dts <= ctx->lastDts)
+        pkt->dts = ctx->lastDts + 1;
+
+    ctx->lastDts = pkt->dts;
+
+    return 1;
+}
+
+static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+    const AVFrame *frame, int *got_packet)
+{
+    NVENCSTATUS nvStatus;
+    NvencContext *ctx = avctx->priv_data;
+    NvencOutputSurface *tmpoutsurf;
+    int i = 0;
+
+    NV_ENC_PIC_PARAMS picParams = { 0 };
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+
+    if (frame)
+    {
+        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
+        NvencInputSurface *inSurf = 0;
+
+        for (i = 0; i < ctx->maxSurfaceCount; ++i)
+            if (ctx->inputSurfaces[i].lockCount == 0)
+                inSurf = &ctx->inputSurfaces[i];
+        assert(inSurf);
+
+        inSurf->lockCount = 1;
+
+        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
+        lockBufferParams.inputBuffer = inSurf->inputSurface;
+
+        nvStatus = pNvEnc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
+        if (nvStatus != NV_ENC_SUCCESS)
+        {
+            av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
+            return 0;
+        }
+
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV420P)
+        {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch / 2,
+                frame->data[2], frame->linesize[2],
+                avctx->width / 2, avctx->height / 2);
+
+            buf += (inSurf->height * lockBufferParams.pitch) / 4;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch / 2,
+                frame->data[1], frame->linesize[1],
+                avctx->width / 2, avctx->height / 2);
+        }
+        else if (avctx->pix_fmt == AV_PIX_FMT_NV12)
+        {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height / 2);
+        }
+        else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P)
+        {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[2], frame->linesize[2],
+                avctx->width, avctx->height);
+        }
+        else
+        {
+            av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
+            return AVERROR(EINVAL);
+        }
+
+        nvStatus = pNvEnc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->inputSurface);
+        if (nvStatus != NV_ENC_SUCCESS)
+        {
+            av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
+            return AVERROR_EXTERNAL;
+        }
+
+        for (i = 0; i < ctx->maxSurfaceCount; ++i)
+            if (!ctx->outputSurfaces[i].busy)
+                break;
+
+        if (i == ctx->maxSurfaceCount)
+        {
+            inSurf->lockCount = 0;
+            av_log(avctx, AV_LOG_ERROR, "No free output surface found!\n");
+            return 0;
+        }
+
+        ctx->outputSurfaces[i].inputSurface = inSurf;
+
+        picParams.inputBuffer = inSurf->inputSurface;
+        picParams.bufferFmt = inSurf->format;
+        picParams.inputWidth = avctx->width;
+        picParams.inputHeight = avctx->height;
+        picParams.outputBitstream = ctx->outputSurfaces[i].outputSurface;
+        picParams.completionEvent = 0;
+
+        if (avctx->flags & CODEC_FLAG_INTERLACED_DCT)
+        {
+            if (frame->top_field_first)
+                picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
+            else
+                picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
+        }
+        else
+        {
+            picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+        }
+
+        picParams.encodePicFlags = 0;
+        picParams.inputTimeStamp = frame->pts;
+        picParams.inputDuration = 0;
+        picParams.codecPicParams.h264PicParams.sliceMode = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceMode;
+        picParams.codecPicParams.h264PicParams.sliceModeData = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData;
+        memcpy(&picParams.rcParams, &ctx->encodeConfig.rcParams, sizeof(NV_ENC_RC_PARAMS));
+
+        timestamp_list_insert_sorted(&ctx->timestampList, frame->pts);
+    }
+    else
+    {
+        picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+    }
+
+    nvStatus = pNvEnc->nvEncEncodePicture(ctx->nvencoder, &picParams);
+
+    if (frame && nvStatus == NV_ENC_ERR_NEED_MORE_INPUT)
+    {
+        out_surf_queue_push(&ctx->outputSurfaceQueue, &ctx->outputSurfaces[i]);
+        ctx->outputSurfaces[i].busy = 1;
+    }
+
+    if (nvStatus != NV_ENC_SUCCESS && nvStatus != NV_ENC_ERR_NEED_MORE_INPUT)
+    {
+        av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    if (nvStatus != NV_ENC_ERR_NEED_MORE_INPUT)
+    {
+        while (ctx->outputSurfaceQueue)
+        {
+            tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceQueue);
+            out_surf_queue_push(&ctx->outputSurfaceReadyQueue, tmpoutsurf);
+        }
+
+        if (frame)
+        {
+            out_surf_queue_push(&ctx->outputSurfaceReadyQueue, &ctx->outputSurfaces[i]);
+            ctx->outputSurfaces[i].busy = 1;
+        }
+    }
+
+    if (ctx->outputSurfaceReadyQueue)
+    {
+        tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
+
+        *got_packet = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf);
+
+        tmpoutsurf->busy = 0;
+        assert(tmpoutsurf->inputSurface->lockCount);
+        tmpoutsurf->inputSurface->lockCount -= 1;
+    }
+
+    return 0;
+}
+
+static int pix_fmts_nvenc_initialized = 0;
+
+static enum AVPixelFormat pix_fmts_nvenc[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_NONE,
+    AV_PIX_FMT_NONE,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold void nvenc_init_static(AVCodec *codec)
+{
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
+    CUcontext cuctxcur = 0, cuctx = 0;
+    NVENCSTATUS nvStatus;
+    void *nvencoder = 0;
+    GUID encodeGuid = NV_ENC_CODEC_H264_GUID;
+    GUID license = dummy_license;
+    int i = 0, pos = 0;
+    int gotnv12 = 0, got420 = 0, got444 = 0;
+
+#ifndef _WIN32
+    const char *tmpStr = 0;
+#endif
+
+    uint32_t inputFmtCount = 32;
+    NV_ENC_BUFFER_FORMAT inputFmts[32];
+    for (i = 0; i < 32; ++i)
+        inputFmts[i] = (NV_ENC_BUFFER_FORMAT)0;
+    i = 0;
+
+    if (pix_fmts_nvenc_initialized)
+    {
+        codec->pix_fmts = pix_fmts_nvenc;
+        return;
+    }
+
+    if (!nvenc_dyload_nvenc(0))
+    {
+        pix_fmts_nvenc_initialized = 1;
+        return;
+    }
+
+#ifndef _WIN32
+    if(tmpStr = getenv("NVENC_KEY"))
+        nvenc_string_to_guid(tmpStr, &license);
+#endif
+
+    stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
+    stEncodeSessionParams.clientKeyPtr = &license;
+
+    cuctx = 0;
+    if (cuCtxCreate(&cuctx, 0, pNvencDevices[iNvencUseDeviceID]) != CUDA_SUCCESS)
+    {
+        cuctx = 0;
+        goto error;
+    }
+
+    if (cuCtxPopCurrent(&cuctxcur) != CUDA_SUCCESS)
+        goto error;
+
+    stEncodeSessionParams.device = (void*)cuctx;
+    stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+    nvStatus = pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &nvencoder);
+    if (nvStatus != NV_ENC_SUCCESS)
+    {
+        nvencoder = 0;
+        goto error;
+    }
+
+    nvStatus = pNvEnc->nvEncGetInputFormats(nvencoder, encodeGuid, inputFmts, 32, &inputFmtCount);
+    if (nvStatus != NV_ENC_SUCCESS)
+        goto error;
+
+    pos = 0;
+    for (i = 0; i < inputFmtCount && pos < 3; ++i)
+    {
+        if (!gotnv12 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_PL
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED16x16
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED64x16))
+        {
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_NV12;
+            gotnv12 = 1;
+        }
+        else if (!got420 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_PL
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED16x16
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED64x16))
+        {
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV420P;
+            got420 = 1;
+        }
+        else if (!got444 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_PL
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16
+            || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16))
+        {
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV444P;
+            got444 = 1;
+        }
+    }
+
+    pix_fmts_nvenc[pos] = AV_PIX_FMT_NONE;
+
+    pix_fmts_nvenc_initialized = 1;
+    codec->pix_fmts = pix_fmts_nvenc;
+
+    pNvEnc->nvEncDestroyEncoder(nvencoder);
+    cuCtxDestroy(cuctx);
+
+    nvenc_unload_nvenc(0);
+
+    return;
+
+error:
+
+    if (nvencoder)
+        pNvEnc->nvEncDestroyEncoder(nvencoder);
+
+    if (cuctx)
+        cuCtxDestroy(cuctx);
+
+    pix_fmts_nvenc_initialized = 1;
+    pix_fmts_nvenc[0] = AV_PIX_FMT_NV12;
+    pix_fmts_nvenc[1] = AV_PIX_FMT_NONE;
+
+    codec->pix_fmts = pix_fmts_nvenc;
+
+    nvenc_unload_nvenc(0);
+}
+
+#define OFFSET(x) offsetof(NvencContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "profile", "Set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "high" }, 0, 0, VE},
+    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
+    { "cqp", "Constant quantization parameter rate control method", OFFSET(cqp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
+    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
+    { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE },
+    { NULL }
+};
+
+static const AVClass nvenc_class = {
+    .class_name = "nvenc",
+    .item_name = av_default_item_name,
+    .option = options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault nvenc_defaults[] = {
+    { "b", "0" },
+    { "qmin", "-1" },
+    { "qmax", "-1" },
+    { "qdiff", "-1" },
+    { "qblur", "-1" },
+    { "qcomp", "-1" },
+    { NULL },
+};
+
+AVCodec ff_nvenc_encoder = {
+    .name = "nvenc",
+    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
+    .type = AVMEDIA_TYPE_VIDEO,
+    .id = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(NvencContext),
+    .init = nvenc_encode_init,
+    .encode2 = nvenc_encode_frame,
+    .close = nvenc_encode_close,
+    .capabilities = CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL,
+    .priv_class = &nvenc_class,
+    .defaults = nvenc_defaults,
+    .init_static_data = nvenc_init_static
+};
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
new file mode 100644
index 0000000..0b9ac21
--- /dev/null
+++ b/libavcodec/nvenc.h
@@ -0,0 +1,25 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVENC_H
+#define AVCODEC_NVENC_H
+
+#endif
diff --git a/libavcodec/nvenc_api.c b/libavcodec/nvenc_api.c
new file mode 100644
index 0000000..f5093ac
--- /dev/null
+++ b/libavcodec/nvenc_api.c
@@ -0,0 +1,330 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+#include "avcodec.h"
+#include "internal.h"
+
+#include "nvenc_cuda.h"
+#include "nvenc_api.h"
+
+PCUINIT cuInit = 0;
+PCUDEVICEGETCOUNT cuDeviceGetCount = 0;
+PCUDEVICEGET cuDeviceGet = 0;
+PCUDEVICEGETNAME cuDeviceGetName = 0;
+PCUDEVICECOMPUTECAPABILITY cuDeviceComputeCapability = 0;
+PCUCTXCREATE cuCtxCreate = 0;
+PCUCTXPOPCURRENT cuCtxPopCurrent = 0;
+PCUCTXDESTROY cuCtxDestroy = 0;
+
+static int nvenc_init_count = 0;
+static NV_ENCODE_API_FUNCTION_LIST nvEncFuncs;
+NV_ENCODE_API_FUNCTION_LIST *pNvEnc = 0;
+int iNvencDeviceCount = 0;
+CUdevice pNvencDevices[16];
+unsigned int iNvencUseDeviceID = 0;
+
+#ifdef _WIN32
+#define LOAD_FUNC(l, s) GetProcAddress(l, s)
+#define DL_CLOSE_FUNC(l) FreeLibrary(l)
+static HMODULE cudaLib = NULL;
+static HMODULE nvEncLib = NULL;
+#else
+#define LOAD_FUNC(l, s) dlsym(l, s)
+#define DL_CLOSE_FUNC(l) dlclose(l)
+static void *cudaLib = 0;
+static void *nvEncLib = 0;
+#endif
+
+#define ifav_log(...) if(avctx) { av_log(__VA_ARGS__); }
+
+#define CHECK_LOAD_FUNC(t, f, s) \
+{ \
+    f = (t)LOAD_FUNC(cudaLib, s); \
+    if (f == 0) \
+    { \
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
+        goto error; \
+    } \
+}
+
+av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
+{
+    if(cudaLib)
+        return 1;
+
+#if defined(_WIN32)
+    cudaLib = LoadLibrary(TEXT("nvcuda.dll"));
+#elif defined(__CYGWIN__)
+    cudaLib = dlopen("nvcuda.dll", RTLD_LAZY);
+#else
+    cudaLib = dlopen("libcuda.so", RTLD_LAZY);
+#endif
+
+    if(!cudaLib)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
+        goto error;
+    }
+
+    CHECK_LOAD_FUNC(PCUINIT, cuInit, "cuInit");
+    CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, cuDeviceGetCount, "cuDeviceGetCount");
+    CHECK_LOAD_FUNC(PCUDEVICEGET, cuDeviceGet, "cuDeviceGet");
+    CHECK_LOAD_FUNC(PCUDEVICEGETNAME, cuDeviceGetName, "cuDeviceGetName");
+    CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, cuDeviceComputeCapability, "cuDeviceComputeCapability");
+    CHECK_LOAD_FUNC(PCUCTXCREATE, cuCtxCreate, "cuCtxCreate_v2");
+    CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, cuCtxPopCurrent, "cuCtxPopCurrent_v2");
+    CHECK_LOAD_FUNC(PCUCTXDESTROY, cuCtxDestroy, "cuCtxDestroy_v2");
+
+    return 1;
+
+error:
+
+    if(cudaLib)
+        DL_CLOSE_FUNC(cudaLib);
+    cudaLib = 0;
+
+    return 0;
+}
+
+static int _checkCudaErrors(AVCodecContext *avctx, CUresult err, const char *func)
+{
+    if(err != CUDA_SUCCESS)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
+        return 0;
+    }
+    return 1;
+}
+#define checkCudaErrors(f) if(!_checkCudaErrors(avctx, f, #f)) goto error
+
+av_cold int nvenc_check_cuda(AVCodecContext *avctx)
+{
+    int deviceCount = 0;
+    CUdevice cuDevice = 0;
+    char gpu_name[128];
+    int SMminor = 0, SMmajor = 0;
+    int i, smver;
+
+    if(!nvenc_dyload_cuda(avctx))
+        return 0;
+
+    if(iNvencDeviceCount > 0)
+        return 1;
+
+    checkCudaErrors(cuInit(0));
+
+    checkCudaErrors(cuDeviceGetCount(&deviceCount));
+
+    if(deviceCount == 0)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
+        goto error;
+    }
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", deviceCount);
+
+    iNvencDeviceCount = 0;
+
+    for(i = 0; i < deviceCount; ++i)
+    {
+        checkCudaErrors(cuDeviceGet(&cuDevice, i));
+        checkCudaErrors(cuDeviceGetName(gpu_name, 128, cuDevice));
+        checkCudaErrors(cuDeviceComputeCapability(&SMmajor, &SMminor, cuDevice));
+
+        smver = (SMmajor << 4) | SMminor;
+
+        ifav_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, SMmajor, SMminor, (smver >= 0x30) ? "Available" : "Not Available");
+
+        if(smver >= 0x30)
+            pNvencDevices[iNvencDeviceCount++] = cuDevice;
+    }
+
+    if(iNvencDeviceCount == 0)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
+        goto error;
+    }
+
+    return 1;
+
+error:
+
+    iNvencDeviceCount = 0;
+
+    return 0;
+}
+
+av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
+{
+    PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
+    NVENCSTATUS nvstatus;
+
+    if(!nvenc_check_cuda(avctx))
+        return 0;
+
+    if(pNvEnc != 0)
+    {
+        nvenc_init_count += 1;
+        return 1;
+    }
+
+#if defined(_WIN32)
+    if(sizeof(void*) == 8)
+        nvEncLib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
+    else
+        nvEncLib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
+#elif defined(__CYGWIN__)
+    if(sizeof(void*) == 8)
+        nvEncLib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY);
+    else
+        nvEncLib = dlopen("nvEncodeAPI.dll", RTLD_LAZY);
+#else
+    nvEncLib = dlopen("libnvidia-encode.so", RTLD_LAZY);
+#endif
+
+    if(!nvEncLib)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
+        goto error;
+    }
+
+    nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(nvEncLib, "NvEncodeAPICreateInstance");
+
+    if(!nvEncodeAPICreateInstance)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
+        goto error;
+    }
+
+    pNvEnc = &nvEncFuncs;
+    memset(pNvEnc, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
+    pNvEnc->version = NV_ENCODE_API_FUNCTION_LIST_VER;
+
+    nvstatus = nvEncodeAPICreateInstance(pNvEnc);
+
+    if(nvstatus != NV_ENC_SUCCESS)
+    {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
+        goto error;
+    }
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
+
+    nvenc_init_count = 1;
+
+    return 1;
+
+error:
+
+    if(nvEncLib)
+        DL_CLOSE_FUNC(nvEncLib);
+    nvEncLib = 0;
+
+    pNvEnc = 0;
+
+    nvenc_init_count = 0;
+
+    return 0;
+}
+
+av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
+{
+    if(nvenc_init_count <= 0)
+        return;
+
+    nvenc_init_count -= 1;
+
+    if(nvenc_init_count > 0)
+        return;
+
+    DL_CLOSE_FUNC(nvEncLib);
+    nvEncLib = 0;
+    pNvEnc = 0;
+
+    iNvencDeviceCount = 0;
+
+    DL_CLOSE_FUNC(cudaLib);
+    cudaLib = 0;
+
+    cuInit = 0;
+    cuDeviceGetCount = 0;
+    cuDeviceGet = 0;
+    cuDeviceGetName = 0;
+    cuDeviceComputeCapability = 0;
+    cuCtxCreate = 0;
+    cuCtxPopCurrent = 0;
+    cuCtxDestroy = 0;
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
+}
+
+av_cold int nvenc_string_to_guid(const char *str, GUID *guid)
+{
+    int res;
+
+    assert(guid);
+    assert(str && strlen(str) >= 38);
+
+    res = sscanf(str, "{%8X-%4hX-%4hX-%2hhX%2hhX-%2hhX%2hhX%2hhX%2hhX%2hhX%2hhX}",
+        &guid->Data1,
+        &guid->Data2,
+        &guid->Data3,
+        &guid->Data4[0],
+        &guid->Data4[1],
+        &guid->Data4[2],
+        &guid->Data4[3],
+        &guid->Data4[4],
+        &guid->Data4[5],
+        &guid->Data4[6],
+        &guid->Data4[7]);
+
+    return res == 11;
+}
+
+av_cold int nvenc_guid_to_string(const GUID *guid, char *str, int n)
+{
+    int res;
+
+    assert(guid);
+    assert(str && n >= 38);
+
+    res = snprintf(str, n, "{%08X-%04hX-%04hX-%02X%02X-%02X%02X%02X%02X%02X%02X}",
+        guid->Data1,
+        guid->Data2,
+        guid->Data3,
+        guid->Data4[0],
+        guid->Data4[1],
+        guid->Data4[2],
+        guid->Data4[3],
+        guid->Data4[4],
+        guid->Data4[5],
+        guid->Data4[6],
+        guid->Data4[7]);
+
+    return res == 38;
+}
+
diff --git a/libavcodec/nvenc_api.h b/libavcodec/nvenc_api.h
new file mode 100644
index 0000000..a61dff2
--- /dev/null
+++ b/libavcodec/nvenc_api.h
@@ -0,0 +1,38 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVENC_API_H
+#define AVCODEC_NVENC_API_H
+
+#include <nvEncodeAPI.h>
+
+
+typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
+
+extern NV_ENCODE_API_FUNCTION_LIST *pNvEnc;
+
+int nvenc_dyload_nvenc(AVCodecContext *avctx);
+void nvenc_unload_nvenc(AVCodecContext *avctx);
+
+int nvenc_string_to_guid(const char *str, GUID *guid);
+int nvenc_guid_to_string(const GUID *guid, char *str, int n);
+
+#endif
diff --git a/libavcodec/nvenc_cuda.h b/libavcodec/nvenc_cuda.h
new file mode 100644
index 0000000..4e8ca20
--- /dev/null
+++ b/libavcodec/nvenc_cuda.h
@@ -0,0 +1,62 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVENC_CUDA_H
+#define AVCODEC_NVENC_CUDA_H
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS = 0
+} CUresult;
+typedef int CUdevice;
+typedef void* CUcontext;
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
+typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
+typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
+typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
+typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
+
+extern PCUINIT cuInit;
+extern PCUDEVICEGETCOUNT cuDeviceGetCount;
+extern PCUDEVICEGET cuDeviceGet;
+extern PCUDEVICEGETNAME cuDeviceGetName;
+extern PCUDEVICECOMPUTECAPABILITY cuDeviceComputeCapability;
+extern PCUCTXCREATE cuCtxCreate;
+extern PCUCTXPOPCURRENT cuCtxPopCurrent;
+extern PCUCTXDESTROY cuCtxDestroy;
+
+int nvenc_dyload_cuda(AVCodecContext *avctx);
+int nvenc_check_cuda(AVCodecContext *avctx);
+
+extern int iNvencDeviceCount;
+extern CUdevice pNvencDevices[16];
+extern unsigned int iNvencUseDeviceID;
+
+#endif

signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH] Add NVENC encoder

Reply via email to