Please mark it as non-free, the header clearly says
that it must not be used for open-source software.

It only says not to distribute the header, the compiled binary and the code using it should be free to distribute.

Please use tools/patcheck on your patchfile: It shows
many issues most of which you should be able to fix
quickly, skip the ones that make no sense to you, the
tool is not error-free.
(Tabs and trailing whitespace cannot be committed.)

Fixed now, new patch is attached and on github.

From 4239af3cf66cdaa9ad99386bf728af4c1d1aca8a Mon Sep 17 00:00:00 2001
From: Timo Rothenpieler <t...@rothenpieler.org>
Date: Wed, 26 Nov 2014 11:08:11 +0100
Subject: [PATCH] Add NVENC encoder

---
 Changelog               |   1 +
 configure               |  11 +-
 libavcodec/Makefile     |   1 +
 libavcodec/allcodecs.c  |   1 +
 libavcodec/nvenc.c      | 932 ++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/nvenc_api.c  | 275 ++++++++++++++
 libavcodec/nvenc_api.h  |  35 ++
 libavcodec/nvenc_cuda.h |  62 ++++
 8 files changed, 1316 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/nvenc.c
 create mode 100644 libavcodec/nvenc_api.c
 create mode 100644 libavcodec/nvenc_api.h
 create mode 100644 libavcodec/nvenc_cuda.h

diff --git a/Changelog b/Changelog
index 7172d0c..d26b7fa 100644
--- a/Changelog
+++ b/Changelog
@@ -17,6 +17,7 @@ version <next>:
 - WebP muxer with animated WebP support
 - zygoaudio decoding support
 - APNG demuxer
+- nvenc encoder
 
 
 version 2.4:
diff --git a/configure b/configure
index 38619c4..d0b790c 100755
--- a/configure
+++ b/configure
@@ -261,6 +261,7 @@ External library support:
   --enable-libzvbi         enable teletext support via libzvbi [no]
   --disable-lzma           disable lzma [autodetect]
   --enable-decklink        enable Blackmagick DeckLink I/O support [no]
+  --enable-nvenc           enable NVIDIA NVENC support [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
   --enable-opencl          enable OpenCL code
   --enable-opengl          enable OpenGL rendering [no]
@@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST="
     libzmq
     libzvbi
     lzma
+    nvenc
     openal
     opencl
     opengl
@@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid"
 libutvideo_decoder_deps="libutvideo"
 libutvideo_encoder_deps="libutvideo"
 libzvbi_teletext_decoder_deps="libzvbi"
+nvenc_encoder_deps="nvenc"
 
 # demuxers / muxers
 ac3_demuxer_select="ac3_parser"
@@ -2569,9 +2572,7 @@ drawtext_filter_deps="libfreetype"
 ebur128_filter_deps="gpl"
 flite_filter_deps="libflite"
 frei0r_filter_deps="frei0r dlopen"
-frei0r_filter_extralibs='$ldl'
 frei0r_src_filter_deps="frei0r dlopen"
-frei0r_src_filter_extralibs='$ldl'
 geq_filter_deps="gpl"
 histeq_filter_deps="gpl"
 hqdn3d_filter_deps="gpl"
@@ -4650,6 +4651,11 @@ elif check_func dlopen -ldl; then
     ldl=-ldl
 fi
 
+# set a few flags which depend on ldl and can't be set earlier
+nvenc_encoder_extralibs='$ldl'
+frei0r_filter_extralibs='$ldl'
+frei0r_src_filter_extralibs='$ldl'
+
 if ! disabled network; then
     check_func getaddrinfo $network_extralibs
     check_func getservbyport $network_extralibs
@@ -4913,6 +4919,7 @@ enabled libxavs           && require libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
 enabled libzmq            && require_pkg_config libzmq zmq.h zmq_ctx_new
 enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
+enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; }
 enabled openal            && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                                check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
                                die "ERROR: openal not found"; } &&
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa0f53d..cc41564 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER)           += mxpegdec.o
 OBJS-$(CONFIG_NELLYMOSER_DECODER)      += nellymoserdec.o nellymoser.o
 OBJS-$(CONFIG_NELLYMOSER_ENCODER)      += nellymoserenc.o nellymoser.o
 OBJS-$(CONFIG_NUV_DECODER)             += nuv.o rtjpeg.o
+OBJS-$(CONFIG_NVENC_ENCODER)           += nvenc.o nvenc_api.o
 OBJS-$(CONFIG_ON2AVC_DECODER)          += on2avc.o on2avcdata.o
 OBJS-$(CONFIG_OPUS_DECODER)            += opusdec.o opus.o opus_celt.o \
                                           opus_imdct.o opus_silk.o     \
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0d39d33..8ceee2f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -223,6 +223,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(MVC2,              mvc2);
     REGISTER_DECODER(MXPEG,             mxpeg);
     REGISTER_DECODER(NUV,               nuv);
+    REGISTER_ENCODER(NVENC,             nvenc);
     REGISTER_DECODER(PAF_VIDEO,         paf_video);
     REGISTER_ENCDEC (PAM,               pam);
     REGISTER_ENCDEC (PBM,               pbm);
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
new file mode 100644
index 0000000..3cb98d3
--- /dev/null
+++ b/libavcodec/nvenc.c
@@ -0,0 +1,932 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#include "libavutil/internal.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "avcodec.h"
+#include "internal.h"
+
+#include "nvenc_cuda.h"
+#include "nvenc_api.h"
+
+typedef struct NvencInputSurface
+{
+    NV_ENC_INPUT_PTR inputSurface;
+    int width;
+    int height;
+
+    int lockCount;
+
+    NV_ENC_BUFFER_FORMAT format;
+} NvencInputSurface;
+
+typedef struct NvencOutputSurface
+{
+    NV_ENC_OUTPUT_PTR outputSurface;
+    int size;
+
+    NvencInputSurface *inputSurface;
+
+    int busy;
+} NvencOutputSurface;
+
+typedef struct NvencOutputSurfaceList
+{
+    NvencOutputSurface *surface;
+    struct NvencOutputSurfaceList *next;
+} NvencOutputSurfaceList;
+
+typedef struct NvencTimestampList
+{
+    int64_t timestamp;
+    struct NvencTimestampList *next;
+} NvencTimestampList;
+
+typedef struct NvencContext
+{
+    AVClass *avclass;
+
+    NV_ENC_INITIALIZE_PARAMS initEncodeParams;
+    NV_ENC_CONFIG encodeConfig;
+    CUcontext cuContext;
+
+    int maxSurfaceCount;
+    NvencInputSurface *inputSurfaces;
+    NvencOutputSurface *outputSurfaces;
+
+    NvencOutputSurfaceList *outputSurfaceQueue;
+    NvencOutputSurfaceList *outputSurfaceReadyQueue;
+    NvencTimestampList *timestampList;
+    int64_t lastDts;
+
+    void *nvencoder;
+
+    char *profile;
+    char *preset;
+    int cqp;
+    int cbr;
+    int twopass;
+    int gobpattern;
+} NvencContext;
+
+static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
+
+static void out_surf_queue_push(NvencOutputSurfaceList** head, NvencOutputSurface *surface)
+{
+    if (!*head) {
+        *head = av_malloc(sizeof(NvencOutputSurfaceList));
+        (*head)->next = 0;
+        (*head)->surface = surface;
+        return;
+    }
+
+    while ((*head)->next)
+        head = &((*head)->next);
+
+    (*head)->next = av_malloc(sizeof(NvencOutputSurfaceList));
+    (*head)->next->next = 0;
+    (*head)->next->surface = surface;
+}
+
+static NvencOutputSurface *out_surf_queue_pop(NvencOutputSurfaceList** head)
+{
+    NvencOutputSurfaceList *tmp;
+    NvencOutputSurface *res;
+
+    if (!*head)
+        return 0;
+
+    tmp = *head;
+    res = tmp->surface;
+    *head = tmp->next;
+    av_free(tmp);
+
+    return res;
+}
+
+static void timestamp_list_insert_sorted(NvencTimestampList** head, int64_t timestamp)
+{
+    NvencTimestampList *newelem;
+    NvencTimestampList *prev;
+
+    if (!*head) {
+        *head = av_malloc(sizeof(NvencTimestampList));
+        (*head)->next = 0;
+        (*head)->timestamp = timestamp;
+        return;
+    }
+
+    prev = 0;
+    while (*head && timestamp >= (*head)->timestamp) {
+        prev = *head;
+        head = &((*head)->next);
+    }
+
+    newelem = av_malloc(sizeof(NvencTimestampList));
+    newelem->next = *head;
+    newelem->timestamp = timestamp;
+
+    if (*head) {
+        *head = newelem;
+    } else {
+        prev->next = newelem;
+    }
+}
+
+static int64_t timestamp_list_get_lowest(NvencTimestampList** head)
+{
+    NvencTimestampList *tmp;
+    int64_t res;
+
+    if (!*head)
+        return 0;
+
+    tmp = *head;
+    res = tmp->timestamp;
+    *head = tmp->next;
+    av_free(tmp);
+
+    return res;
+}
+
+static av_cold int nvenc_encode_init(AVCodecContext *avctx)
+{
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
+    NV_ENC_PRESET_CONFIG presetConfig = { 0 };
+    CUcontext cuContextCurr;
+    GUID encoderPreset = NV_ENC_PRESET_HQ_GUID;
+    GUID license = dummy_license;
+    NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
+    int surfaceCount = 0;
+    int i, numMBs;
+    int isLL = 0;
+
+    NvencContext *ctx = avctx->priv_data;
+
+    if (!ff_nvenc_dyload_nvenc(avctx))
+        return AVERROR_EXTERNAL;
+
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame)
+        return AVERROR(ENOMEM);
+
+    memset(&ctx->initEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS));
+    memset(&ctx->encodeConfig, 0, sizeof(NV_ENC_CONFIG));
+
+    ctx->outputSurfaceQueue = 0;
+    ctx->outputSurfaceReadyQueue = 0;
+    ctx->timestampList = 0;
+    ctx->lastDts = AV_NOPTS_VALUE;
+    ctx->nvencoder = 0;
+
+    ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
+    ctx->initEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER;
+    presetConfig.version = NV_ENC_PRESET_CONFIG_VER;
+    presetConfig.presetCfg.version = NV_ENC_CONFIG_VER;
+    stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
+    stEncodeSessionParams.clientKeyPtr = &license;
+
+    ctx->cuContext = 0;
+    if (ff_cuCtxCreate(&ctx->cuContext, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS
+            || ff_cuCtxPopCurrent(&cuContextCurr) != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC\n");
+        goto error;
+    }
+
+    stEncodeSessionParams.device = (void*)ctx->cuContext;
+    stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+    nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &ctx->nvencoder);
+    if (nvStatus != NV_ENC_SUCCESS) {
+        ctx->nvencoder = 0;
+        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nvStatus);
+        goto error;
+    }
+
+    if (ctx->preset) {
+        if (!strcmp(ctx->preset, "hp")) {
+            encoderPreset = NV_ENC_PRESET_HP_GUID;
+        } else if (!strcmp(ctx->preset, "hq")) {
+            encoderPreset = NV_ENC_PRESET_HQ_GUID;
+        } else if (!strcmp(ctx->preset, "bd")) {
+            encoderPreset = NV_ENC_PRESET_BD_GUID;
+        } else if (!strcmp(ctx->preset, "ll")) {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "llhp")) {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "llhq")) {
+            encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "default")) {
+            encoderPreset = NV_ENC_PRESET_DEFAULT_GUID;
+        } else {
+            av_log(avctx, AV_LOG_ERROR, "Preset \"%s\" is unknown!\n", ctx->preset);
+        }
+    }
+
+    nvStatus = ff_pNvEnc->nvEncGetEncodePresetConfig(ctx->nvencoder, NV_ENC_CODEC_H264_GUID, encoderPreset, &presetConfig);
+    if (nvStatus != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nvStatus);
+        goto error;
+    }
+
+    ctx->initEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID;
+    ctx->initEncodeParams.encodeHeight = avctx->height;
+    ctx->initEncodeParams.encodeWidth = avctx->width;
+    ctx->initEncodeParams.darHeight = avctx->height;
+    ctx->initEncodeParams.darWidth = avctx->width;
+    ctx->initEncodeParams.frameRateNum = avctx->time_base.den;
+    ctx->initEncodeParams.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
+
+    numMBs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
+    ctx->maxSurfaceCount = (numMBs >= 8160) ? 16 : 32;
+
+    ctx->initEncodeParams.enableEncodeAsync = 0;
+    ctx->initEncodeParams.enablePTD = 1;
+
+    ctx->initEncodeParams.presetGUID = encoderPreset;
+
+    ctx->initEncodeParams.encodeConfig = &ctx->encodeConfig;
+    memcpy(&ctx->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG));
+    ctx->encodeConfig.version = NV_ENC_CONFIG_VER;
+
+    if (avctx->gop_size >= 0) {
+        ctx->encodeConfig.gopLength = avctx->gop_size;
+        ctx->encodeConfig.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
+    }
+
+    if (avctx->bit_rate > 0)
+        ctx->encodeConfig.rcParams.averageBitRate = avctx->bit_rate;
+
+    if (avctx->rc_max_rate > 0)
+        ctx->encodeConfig.rcParams.maxBitRate = avctx->rc_max_rate;
+
+    if (ctx->cbr) {
+        if (!ctx->twopass) {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        } else if (ctx->twopass == 1 || isLL) {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
+
+            ctx->encodeConfig.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
+            ctx->encodeConfig.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
+
+            if (!isLL)
+                av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
+        } else {
+            ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        }
+    } else if (ctx->cqp >= 0) {
+        ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+        ctx->encodeConfig.rcParams.constQP.qpInterB = ctx->cqp;
+        ctx->encodeConfig.rcParams.constQP.qpInterP = ctx->cqp;
+        ctx->encodeConfig.rcParams.constQP.qpIntra = ctx->cqp;
+
+        avctx->qmin = -1;
+        avctx->qmax = -1;
+    } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
+        ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+
+        ctx->encodeConfig.rcParams.enableMinQP = 1;
+        ctx->encodeConfig.rcParams.enableMaxQP = 1;
+
+        ctx->encodeConfig.rcParams.minQP.qpInterB = avctx->qmin;
+        ctx->encodeConfig.rcParams.minQP.qpInterP = avctx->qmin;
+        ctx->encodeConfig.rcParams.minQP.qpIntra = avctx->qmin;
+
+        ctx->encodeConfig.rcParams.maxQP.qpInterB = avctx->qmax;
+        ctx->encodeConfig.rcParams.maxQP.qpInterP = avctx->qmax;
+        ctx->encodeConfig.rcParams.maxQP.qpIntra = avctx->qmax;
+    }
+
+    if (avctx->rc_buffer_size > 0)
+        ctx->encodeConfig.rcParams.vbvBufferSize = avctx->rc_buffer_size;
+
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+        ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
+    } else {
+        ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
+    }
+
+    if (!ctx->profile) {
+        switch (avctx->profile) {
+            case FF_PROFILE_H264_BASELINE:
+            ctx->profile = av_strdup("baseline");
+            break;
+            case FF_PROFILE_H264_MAIN:
+            ctx->profile = av_strdup("main");
+            break;
+            default:
+            ctx->profile = av_strdup("high");
+            break;
+        }
+    }
+
+    ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+
+    if (!strcmp(ctx->profile, "high")) {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+    } else if (!strcmp(ctx->profile, "main")) {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
+    } else if (!strcmp(ctx->profile, "baseline")) {
+        ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+    } else {
+        av_log(avctx, AV_LOG_WARNING, "Unknown profile requested: %s\n", ctx->profile);
+    }
+
+    if (ctx->gobpattern >= 0) {
+        ctx->encodeConfig.frameIntervalP = 1;
+    }
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
+
+    ctx->encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+
+    nvStatus = ff_pNvEnc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->initEncodeParams);
+    if (nvStatus != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nvStatus);
+        goto error;
+    }
+
+    ctx->inputSurfaces = (NvencInputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencInputSurface));
+    ctx->outputSurfaces = (NvencOutputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencOutputSurface));
+
+    for (surfaceCount = 0; surfaceCount < ctx->maxSurfaceCount; ++surfaceCount) {
+        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
+        NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
+        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
+        allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+
+        allocSurf.width = (avctx->width + 31) & ~31;
+        allocSurf.height = (avctx->height + 31) & ~31;
+
+        allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        switch (avctx->pix_fmt) {
+            case AV_PIX_FMT_YUV420P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
+            break;
+
+            case AV_PIX_FMT_NV12:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
+            break;
+
+            case AV_PIX_FMT_YUV444P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
+            break;
+
+            default:
+            av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
+            goto error;
+        }
+
+        nvStatus = ff_pNvEnc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
+        if (nvStatus = NV_ENC_SUCCESS){
+            av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
+            goto error;
+        }
+
+        ctx->inputSurfaces[surfaceCount].lockCount = 0;
+        ctx->inputSurfaces[surfaceCount].inputSurface = allocSurf.inputBuffer;
+        ctx->inputSurfaces[surfaceCount].format = allocSurf.bufferFmt;
+        ctx->inputSurfaces[surfaceCount].width = allocSurf.width;
+        ctx->inputSurfaces[surfaceCount].height = allocSurf.height;
+
+        allocOut.size = 1024 * 1024;
+        allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        nvStatus = ff_pNvEnc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
+        if (nvStatus = NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
+            ctx->outputSurfaces[surfaceCount++].outputSurface = 0;
+            goto error;
+        }
+
+        ctx->outputSurfaces[surfaceCount].outputSurface = allocOut.bitstreamBuffer;
+        ctx->outputSurfaces[surfaceCount].size = allocOut.size;
+        ctx->outputSurfaces[surfaceCount].busy = 0;
+    }
+
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        uint32_t outSize = 0;
+        char tmpHeader[256];
+        NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
+        payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+
+        payload.spsppsBuffer = tmpHeader;
+        payload.inBufferSize = 256;
+        payload.outSPSPPSPayloadSize = &outSize;
+
+        nvStatus = ff_pNvEnc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
+        if (nvStatus != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
+            goto error;
+        }
+
+        avctx->extradata_size = outSize;
+        avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE);
+
+        memcpy(avctx->extradata, tmpHeader, outSize);
+    } else {
+        avctx->extradata = 0;
+        avctx->extradata_size = 0;
+    }
+
+    if (ctx->encodeConfig.frameIntervalP > 1)
+        avctx->has_b_frames = 2;
+
+    if (ctx->encodeConfig.rcParams.averageBitRate > 0)
+        avctx->bit_rate = ctx->encodeConfig.rcParams.averageBitRate;
+
+    return 0;
+
+error:
+
+    for (i = 0; i < surfaceCount; ++i) {
+        ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
+        if (ctx->outputSurfaces[i].outputSurface)
+            ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
+    }
+
+    if (ctx->nvencoder)
+        ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
+
+    if (ctx->cuContext)
+        ff_cuCtxDestroy(ctx->cuContext);
+
+    ff_nvenc_unload_nvenc(avctx);
+
+    ctx->nvencoder = 0;
+    ctx->cuContext = 0;
+
+    return AVERROR_EXTERNAL;
+}
+
+static av_cold int nvenc_encode_close(AVCodecContext *avctx)
+{
+    NvencContext *ctx = avctx->priv_data;
+    int i;
+
+    if (ctx->profile)
+        av_freep(&ctx->profile);
+
+    if (avctx->extradata)
+        av_freep(&avctx->extradata);
+
+    while (ctx->timestampList)
+        timestamp_list_get_lowest(&ctx->timestampList);
+
+    while (ctx->outputSurfaceReadyQueue)
+        out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
+
+    while (ctx->outputSurfaceQueue)
+        out_surf_queue_pop(&ctx->outputSurfaceQueue);
+
+    for (i = 0; i < ctx->maxSurfaceCount; ++i) {
+        ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface);
+        ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface);
+    }
+    ctx->maxSurfaceCount = 0;
+
+    ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder);
+    ctx->nvencoder = 0;
+
+    ff_cuCtxDestroy(ctx->cuContext);
+    ctx->cuContext = 0;
+
+    ff_nvenc_unload_nvenc(avctx);
+
+    av_frame_free(&avctx->coded_frame);
+
+    return 0;
+}
+
+static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
+{
+    NvencContext *ctx = avctx->priv_data;
+    uint32_t *sliceOffsets = (uint32_t*)calloc(ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData, sizeof(uint32_t));
+    NV_ENC_LOCK_BITSTREAM lockParams = { 0 };
+    NVENCSTATUS nvStatus;
+
+    lockParams.version = NV_ENC_LOCK_BITSTREAM_VER;
+
+    lockParams.doNotWait = 0;
+    lockParams.outputBitstream = tmpoutsurf->outputSurface;
+    lockParams.sliceOffsets = sliceOffsets;
+
+    nvStatus = ff_pNvEnc->nvEncLockBitstream(ctx->nvencoder, &lockParams);
+    if (nvStatus != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
+        timestamp_list_get_lowest(&ctx->timestampList);
+        return 0;
+    }
+
+    if (ff_alloc_packet2(avctx, pkt, lockParams.bitstreamSizeInBytes) < 0) {
+        ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
+        timestamp_list_get_lowest(&ctx->timestampList);
+        return 0;
+    }
+
+    memcpy(pkt->data, lockParams.bitstreamBufferPtr, lockParams.bitstreamSizeInBytes);
+
+    nvStatus = ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface);
+    if (nvStatus != NV_ENC_SUCCESS)
+        av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
+
+    switch (lockParams.pictureType) {
+        case NV_ENC_PIC_TYPE_IDR:
+        pkt->flags |= AV_PKT_FLAG_KEY;
+        case NV_ENC_PIC_TYPE_I:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+
+        case NV_ENC_PIC_TYPE_P:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+
+        case NV_ENC_PIC_TYPE_B:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        break;
+
+        case NV_ENC_PIC_TYPE_BI:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
+        break;
+
+        default:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE;
+        break;
+    }
+
+    pkt->pts = lockParams.outputTimeStamp;
+    pkt->dts = timestamp_list_get_lowest(&ctx->timestampList) - ctx->encodeConfig.frameIntervalP;
+
+    if (pkt->dts > pkt->pts)
+        pkt->dts = pkt->pts;
+
+    if (ctx->lastDts != AV_NOPTS_VALUE && pkt->dts <= ctx->lastDts)
+        pkt->dts = ctx->lastDts + 1;
+
+    ctx->lastDts = pkt->dts;
+
+    return 1;
+}
+
+static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+    const AVFrame *frame, int *got_packet)
+{
+    NVENCSTATUS nvStatus;
+    NvencContext *ctx = avctx->priv_data;
+    NvencOutputSurface *tmpoutsurf;
+    int i = 0;
+
+    NV_ENC_PIC_PARAMS picParams = { 0 };
+    picParams.version = NV_ENC_PIC_PARAMS_VER;
+
+    if (frame) {
+        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
+        NvencInputSurface *inSurf = 0;
+
+        for (i = 0; i < ctx->maxSurfaceCount; ++i)
+            if (!ctx->inputSurfaces[i].lockCount)
+                inSurf = &ctx->inputSurfaces[i];
+        av_assert0(inSurf);
+
+        inSurf->lockCount = 1;
+
+        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
+        lockBufferParams.inputBuffer = inSurf->inputSurface;
+
+        nvStatus = ff_pNvEnc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
+        if (nvStatus != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
+            return 0;
+        }
+
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
+                frame->data[2], frame->linesize[2],
+                avctx->width >> 1, avctx->height >> 1);
+
+            buf += (inSurf->height * lockBufferParams.pitch) >> 2;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
+                frame->data[1], frame->linesize[1],
+                avctx->width >> 1, avctx->height >> 1);
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height >> 1);
+        } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[2], frame->linesize[2],
+                avctx->width, avctx->height);
+        } else {
+            av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
+            return AVERROR(EINVAL);
+        }
+
+        nvStatus = ff_pNvEnc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->inputSurface);
+        if (nvStatus != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
+            return AVERROR_EXTERNAL;
+        }
+
+        for (i = 0; i < ctx->maxSurfaceCount; ++i)
+            if (!ctx->outputSurfaces[i].busy)
+                break;
+
+        if (i == ctx->maxSurfaceCount) {
+            inSurf->lockCount = 0;
+            av_log(avctx, AV_LOG_ERROR, "No free output surface found!\n");
+            return 0;
+        }
+
+        ctx->outputSurfaces[i].inputSurface = inSurf;
+
+        picParams.inputBuffer = inSurf->inputSurface;
+        picParams.bufferFmt = inSurf->format;
+        picParams.inputWidth = avctx->width;
+        picParams.inputHeight = avctx->height;
+        picParams.outputBitstream = ctx->outputSurfaces[i].outputSurface;
+        picParams.completionEvent = 0;
+
+        if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+            if (frame->top_field_first) {
+                picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
+            } else {
+                picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
+            }
+        } else {
+            picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+        }
+
+        picParams.encodePicFlags = 0;
+        picParams.inputTimeStamp = frame->pts;
+        picParams.inputDuration = 0;
+        picParams.codecPicParams.h264PicParams.sliceMode = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceMode;
+        picParams.codecPicParams.h264PicParams.sliceModeData = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData;
+        memcpy(&picParams.rcParams, &ctx->encodeConfig.rcParams, sizeof(NV_ENC_RC_PARAMS));
+
+        timestamp_list_insert_sorted(&ctx->timestampList, frame->pts);
+    } else {
+        picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+    }
+
+    nvStatus = ff_pNvEnc->nvEncEncodePicture(ctx->nvencoder, &picParams);
+
+    if (frame && nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) {
+        out_surf_queue_push(&ctx->outputSurfaceQueue, &ctx->outputSurfaces[i]);
+        ctx->outputSurfaces[i].busy = 1;
+    }
+
+    if (nvStatus != NV_ENC_SUCCESS && nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) {
+        av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    if (nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) {
+        while (ctx->outputSurfaceQueue) {
+            tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceQueue);
+            out_surf_queue_push(&ctx->outputSurfaceReadyQueue, tmpoutsurf);
+        }
+
+        if (frame) {
+            out_surf_queue_push(&ctx->outputSurfaceReadyQueue, &ctx->outputSurfaces[i]);
+            ctx->outputSurfaces[i].busy = 1;
+        }
+    }
+
+    if (ctx->outputSurfaceReadyQueue) {
+        tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceReadyQueue);
+
+        *got_packet = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf);
+
+        tmpoutsurf->busy = 0;
+        av_assert0(tmpoutsurf->inputSurface->lockCount);
+        tmpoutsurf->inputSurface->lockCount--;
+    }
+
+    return 0;
+}
+
+static int pix_fmts_nvenc_initialized;
+
+static enum AVPixelFormat pix_fmts_nvenc[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_NONE,
+    AV_PIX_FMT_NONE,
+    AV_PIX_FMT_NONE
+};
+
+static av_cold void nvenc_init_static(AVCodec *codec)
+{
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 };
+    CUcontext cuctxcur = 0, cuctx = 0;
+    NVENCSTATUS nvStatus;
+    void *nvencoder = 0;
+    GUID encodeGuid = NV_ENC_CODEC_H264_GUID;
+    GUID license = dummy_license;
+    int i = 0, pos = 0;
+    int gotnv12 = 0, got420 = 0, got444 = 0;
+    uint32_t inputFmtCount = 32;
+    NV_ENC_BUFFER_FORMAT inputFmts[32];
+
+    for (i = 0; i < 32; ++i)
+        inputFmts[i] = (NV_ENC_BUFFER_FORMAT)0;
+    i = 0;
+
+    if (pix_fmts_nvenc_initialized) {
+        codec->pix_fmts = pix_fmts_nvenc;
+        return;
+    }
+
+    if (!ff_nvenc_dyload_nvenc(0)) {
+        pix_fmts_nvenc_initialized = 1;
+        return;
+    }
+
+    stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    stEncodeSessionParams.apiVersion = NVENCAPI_VERSION;
+    stEncodeSessionParams.clientKeyPtr = &license;
+
+    cuctx = 0;
+    if (ff_cuCtxCreate(&cuctx, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS) {
+        cuctx = 0;
+        goto error;
+    }
+
+    if (ff_cuCtxPopCurrent(&cuctxcur) != CUDA_SUCCESS)
+        goto error;
+
+    stEncodeSessionParams.device = (void*)cuctx;
+    stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+    nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &nvencoder);
+    if (nvStatus != NV_ENC_SUCCESS) {
+        nvencoder = 0;
+        goto error;
+    }
+
+    nvStatus = ff_pNvEnc->nvEncGetInputFormats(nvencoder, encodeGuid, inputFmts, 32, &inputFmtCount);
+    if (nvStatus != NV_ENC_SUCCESS)
+        goto error;
+
+    pos = 0;
+    for (i = 0; i < inputFmtCount && pos < 3; ++i) {
+        if (!gotnv12 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_PL
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED16x16
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED64x16)) {
+
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_NV12;
+            gotnv12 = 1;
+        } else if (!got420 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_PL
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED16x16
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED64x16)) {
+
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV420P;
+            got420 = 1;
+        } else if (!got444 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_PL
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16
+                || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16)) {
+
+            pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV444P;
+            got444 = 1;
+        }
+    }
+
+    pix_fmts_nvenc[pos] = AV_PIX_FMT_NONE;
+
+    pix_fmts_nvenc_initialized = 1;
+    codec->pix_fmts = pix_fmts_nvenc;
+
+    ff_pNvEnc->nvEncDestroyEncoder(nvencoder);
+    ff_cuCtxDestroy(cuctx);
+
+    ff_nvenc_unload_nvenc(0);
+
+    return;
+
+error:
+
+    if (nvencoder)
+        ff_pNvEnc->nvEncDestroyEncoder(nvencoder);
+
+    if (cuctx)
+        ff_cuCtxDestroy(cuctx);
+
+    pix_fmts_nvenc_initialized = 1;
+    pix_fmts_nvenc[0] = AV_PIX_FMT_NV12;
+    pix_fmts_nvenc[1] = AV_PIX_FMT_NONE;
+
+    codec->pix_fmts = pix_fmts_nvenc;
+
+    ff_nvenc_unload_nvenc(0);
+}
+
+#define OFFSET(x) offsetof(NvencContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "profile", "Set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "high" }, 0, 0, VE},
+    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
+    { "cqp", "Constant quantization parameter rate control method", OFFSET(cqp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
+    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
+    { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE },
+    { NULL }
+};
+
+static const AVClass nvenc_class = {
+    .class_name = "nvenc",
+    .item_name = av_default_item_name,
+    .option = options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault nvenc_defaults[] = {
+    { "b", "0" },
+    { "qmin", "-1" },
+    { "qmax", "-1" },
+    { "qdiff", "-1" },
+    { "qblur", "-1" },
+    { "qcomp", "-1" },
+    { NULL },
+};
+
+AVCodec ff_nvenc_encoder = {
+    .name = "nvenc",
+    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
+    .type = AVMEDIA_TYPE_VIDEO,
+    .id = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(NvencContext),
+    .init = nvenc_encode_init,
+    .encode2 = nvenc_encode_frame,
+    .close = nvenc_encode_close,
+    .capabilities = CODEC_CAP_DELAY,
+    .priv_class = &nvenc_class,
+    .defaults = nvenc_defaults,
+    .init_static_data = nvenc_init_static
+};
diff --git a/libavcodec/nvenc_api.c b/libavcodec/nvenc_api.c
new file mode 100644
index 0000000..53d5fa8
--- /dev/null
+++ b/libavcodec/nvenc_api.c
@@ -0,0 +1,275 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "internal.h"
+
+#include "nvenc_cuda.h"
+#include "nvenc_api.h"
+
+PCUINIT ff_cuInit = 0;
+PCUDEVICEGETCOUNT ff_cuDeviceGetCount = 0;
+PCUDEVICEGET ff_cuDeviceGet = 0;
+PCUDEVICEGETNAME ff_cuDeviceGetName = 0;
+PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability = 0;
+PCUCTXCREATE ff_cuCtxCreate = 0;
+PCUCTXPOPCURRENT ff_cuCtxPopCurrent = 0;
+PCUCTXDESTROY ff_cuCtxDestroy = 0;
+
+static int nvenc_init_count;
+static NV_ENCODE_API_FUNCTION_LIST nvEncFuncs;
+NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc = 0;
+int ff_iNvencDeviceCount = 0;
+CUdevice ff_pNvencDevices[16];
+unsigned int ff_iNvencUseDeviceID = 0;
+
+#ifdef _WIN32
+#define LOAD_FUNC(l, s) GetProcAddress(l, s)
+#define DL_CLOSE_FUNC(l) FreeLibrary(l)
+static HMODULE cudaLib;
+static HMODULE nvEncLib;
+#else
+#define LOAD_FUNC(l, s) dlsym(l, s)
+#define DL_CLOSE_FUNC(l) dlclose(l)
+static void *cudaLib;
+static void *nvEncLib;
+#endif
+
+#define ifav_log(...) if (avctx) { av_log(__VA_ARGS__); }
+
+#define CHECK_LOAD_FUNC(t, f, s) \
+{ \
+    f = (t)LOAD_FUNC(cudaLib, s); \
+    if (!f) { \
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
+        goto error; \
+    } \
+}
+
+static int nvenc_dyload_cuda(AVCodecContext *avctx)
+{
+    if (cudaLib)
+        return 1;
+
+#if defined(_WIN32)
+    cudaLib = LoadLibrary(TEXT("nvcuda.dll"));
+#elif defined(__CYGWIN__)
+    cudaLib = dlopen("nvcuda.dll", RTLD_LAZY);
+#else
+    cudaLib = dlopen("libcuda.so", RTLD_LAZY);
+#endif
+
+    if (!cudaLib) {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
+        goto error;
+    }
+
+    CHECK_LOAD_FUNC(PCUINIT, ff_cuInit, "cuInit");
+    CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, ff_cuDeviceGetCount, "cuDeviceGetCount");
+    CHECK_LOAD_FUNC(PCUDEVICEGET, ff_cuDeviceGet, "cuDeviceGet");
+    CHECK_LOAD_FUNC(PCUDEVICEGETNAME, ff_cuDeviceGetName, "cuDeviceGetName");
+    CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, ff_cuDeviceComputeCapability, "cuDeviceComputeCapability");
+    CHECK_LOAD_FUNC(PCUCTXCREATE, ff_cuCtxCreate, "cuCtxCreate_v2");
+    CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, ff_cuCtxPopCurrent, "cuCtxPopCurrent_v2");
+    CHECK_LOAD_FUNC(PCUCTXDESTROY, ff_cuCtxDestroy, "cuCtxDestroy_v2");
+
+    return 1;
+
+error:
+
+    if (cudaLib)
+        DL_CLOSE_FUNC(cudaLib);
+
+    cudaLib = 0;
+
+    return 0;
+}
+
+static int checkCudaErrors(AVCodecContext *avctx, CUresult err, const char *func)
+{
+    if (err != CUDA_SUCCESS) {
+        ifav_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
+        return 0;
+    }
+    return 1;
+}
+#define checkCudaErrors(f) if (!checkCudaErrors(avctx, f, #f)) goto error
+
+static int nvenc_check_cuda(AVCodecContext *avctx)
+{
+    int deviceCount = 0;
+    CUdevice cuDevice = 0;
+    char gpu_name[128];
+    int SMminor = 0, SMmajor = 0;
+    int i, smver;
+
+    if (!nvenc_dyload_cuda(avctx))
+        return 0;
+
+    if (ff_iNvencDeviceCount > 0)
+        return 1;
+
+    checkCudaErrors(ff_cuInit(0));
+
+    checkCudaErrors(ff_cuDeviceGetCount(&deviceCount));
+
+    if (!deviceCount) {
+        ifav_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
+        goto error;
+    }
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", deviceCount);
+
+    ff_iNvencDeviceCount = 0;
+
+    for (i = 0; i < deviceCount; ++i) {
+        checkCudaErrors(ff_cuDeviceGet(&cuDevice, i));
+        checkCudaErrors(ff_cuDeviceGetName(gpu_name, 128, cuDevice));
+        checkCudaErrors(ff_cuDeviceComputeCapability(&SMmajor, &SMminor, cuDevice));
+
+        smver = (SMmajor << 4) | SMminor;
+
+        ifav_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, SMmajor, SMminor, (smver >= 0x30) ? "Available" : "Not Available");
+
+        if (smver >= 0x30)
+            ff_pNvencDevices[ff_iNvencDeviceCount++] = cuDevice;
+    }
+
+    if (!ff_iNvencDeviceCount) {
+        ifav_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
+        goto error;
+    }
+
+    return 1;
+
+error:
+
+    ff_iNvencDeviceCount = 0;
+
+    return 0;
+}
+
+av_cold int ff_nvenc_dyload_nvenc(AVCodecContext *avctx)
+{
+    PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
+    NVENCSTATUS nvstatus;
+
+    if (!nvenc_check_cuda(avctx))
+        return 0;
+
+    if (ff_pNvEnc) {
+        nvenc_init_count++;
+        return 1;
+    }
+
+#if defined(_WIN32)
+    if (sizeof(void*) == 8) {
+        nvEncLib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
+    } else {
+        nvEncLib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
+    }
+#elif defined(__CYGWIN__)
+    if (sizeof(void*) == 8) {
+        nvEncLib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY);
+    } else {
+        nvEncLib = dlopen("nvEncodeAPI.dll", RTLD_LAZY);
+    }
+#else
+    nvEncLib = dlopen("libnvidia-encode.so", RTLD_LAZY);
+#endif
+
+    if (!nvEncLib) {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
+        goto error;
+    }
+
+    nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(nvEncLib, "NvEncodeAPICreateInstance");
+
+    if (!nvEncodeAPICreateInstance) {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
+        goto error;
+    }
+
+    ff_pNvEnc = &nvEncFuncs;
+    memset(ff_pNvEnc, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
+    ff_pNvEnc->version = NV_ENCODE_API_FUNCTION_LIST_VER;
+
+    nvstatus = nvEncodeAPICreateInstance(ff_pNvEnc);
+
+    if (nvstatus != NV_ENC_SUCCESS) {
+        ifav_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
+        goto error;
+    }
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
+
+    nvenc_init_count = 1;
+
+    return 1;
+
+error:
+    if (nvEncLib)
+        DL_CLOSE_FUNC(nvEncLib);
+
+    nvEncLib = 0;
+    ff_pNvEnc = 0;
+    nvenc_init_count = 0;
+
+    return 0;
+}
+
+av_cold void ff_nvenc_unload_nvenc(AVCodecContext *avctx)
+{
+    if (nvenc_init_count <= 0)
+        return;
+
+    nvenc_init_count--;
+
+    if (nvenc_init_count > 0)
+        return;
+
+    DL_CLOSE_FUNC(nvEncLib);
+    nvEncLib = 0;
+    ff_pNvEnc = 0;
+
+    ff_iNvencDeviceCount = 0;
+
+    DL_CLOSE_FUNC(cudaLib);
+    cudaLib = 0;
+
+    ff_cuInit = 0;
+    ff_cuDeviceGetCount = 0;
+    ff_cuDeviceGet = 0;
+    ff_cuDeviceGetName = 0;
+    ff_cuDeviceComputeCapability = 0;
+    ff_cuCtxCreate = 0;
+    ff_cuCtxPopCurrent = 0;
+    ff_cuCtxDestroy = 0;
+
+    ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
+}
diff --git a/libavcodec/nvenc_api.h b/libavcodec/nvenc_api.h
new file mode 100644
index 0000000..16b1c72
--- /dev/null
+++ b/libavcodec/nvenc_api.h
@@ -0,0 +1,35 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVENC_API_H
+#define AVCODEC_NVENC_API_H
+
+#include <nvEncodeAPI.h>
+
+
+typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
+
+extern NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc;
+
+int ff_nvenc_dyload_nvenc(AVCodecContext *avctx);
+void ff_nvenc_unload_nvenc(AVCodecContext *avctx);
+
+#endif
diff --git a/libavcodec/nvenc_cuda.h b/libavcodec/nvenc_cuda.h
new file mode 100644
index 0000000..ae43a22
--- /dev/null
+++ b/libavcodec/nvenc_cuda.h
@@ -0,0 +1,62 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_NVENC_CUDA_H
+#define AVCODEC_NVENC_CUDA_H
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS = 0
+} CUresult;
+typedef int CUdevice;
+typedef void* CUcontext;
+
+#ifdef _WIN32
+#define CUDAAPI __stdcall
+#else
+#define CUDAAPI
+#endif
+
+typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
+typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
+typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
+typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
+typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
+
+extern PCUINIT ff_cuInit;
+extern PCUDEVICEGETCOUNT ff_cuDeviceGetCount;
+extern PCUDEVICEGET ff_cuDeviceGet;
+extern PCUDEVICEGETNAME ff_cuDeviceGetName;
+extern PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability;
+extern PCUCTXCREATE ff_cuCtxCreate;
+extern PCUCTXPOPCURRENT ff_cuCtxPopCurrent;
+extern PCUCTXDESTROY ff_cuCtxDestroy;
+
+int ff_nvenc_dyload_cuda(AVCodecContext *avctx);
+int ff_nvenc_check_cuda(AVCodecContext *avctx);
+
+extern int ff_iNvencDeviceCount;
+extern CUdevice ff_pNvencDevices[16];
+extern unsigned int ff_iNvencUseDeviceID;
+
+#endif

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to