Refactored the code even more now, new, tested, patch is attached and on github.

The patch is ontop of my other patch that fixes the order in configure.
commit 2b26ed5562911f39a52ef00942003a52ea06640b
Author: Timo Rothenpieler <t...@rothenpieler.org>
Date:   Wed Nov 26 11:08:11 2014 +0100

    Add NVENC encoder

diff --git a/Changelog b/Changelog
index 7172d0c..d26b7fa 100644
--- a/Changelog
+++ b/Changelog
@@ -17,6 +17,7 @@ version <next>:
 - WebP muxer with animated WebP support
 - zygoaudio decoding support
 - APNG demuxer
+- nvenc encoder
 
 
 version 2.4:
diff --git a/configure b/configure
index 8ecc7fa..96e842c 100755
--- a/configure
+++ b/configure
@@ -261,6 +261,7 @@ External library support:
   --enable-libzvbi         enable teletext support via libzvbi [no]
   --disable-lzma           disable lzma [autodetect]
   --enable-decklink        enable Blackmagick DeckLink I/O support [no]
+  --enable-nvenc           enable NVIDIA NVENC support [no]
   --enable-openal          enable OpenAL 1.1 capture support [no]
   --enable-opencl          enable OpenCL code
   --enable-opengl          enable OpenGL rendering [no]
@@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST="
     libzmq
     libzvbi
     lzma
+    nvenc
     openal
     opencl
     opengl
@@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid"
 libutvideo_decoder_deps="libutvideo"
 libutvideo_encoder_deps="libutvideo"
 libzvbi_teletext_decoder_deps="libzvbi"
+nvenc_encoder_deps="nvenc"
 
 # demuxers / muxers
 ac3_demuxer_select="ac3_parser"
@@ -4341,6 +4344,7 @@ die_license_disabled gpl x11grab
 
 die_license_disabled nonfree libaacplus
 die_license_disabled nonfree libfaac
+die_license_disabled nonfree nvenc
 enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
 enabled gpl && die_license_disabled_gpl nonfree openssl
 
@@ -4650,6 +4654,7 @@ fi
 frei0r_filter_extralibs='$ldl'
 frei0r_src_filter_extralibs='$ldl'
 ladspa_filter_extralibs='$ldl'
+nvenc_encoder_extralibs='$ldl'
 
 if ! disabled network; then
     check_func getaddrinfo $network_extralibs
@@ -4914,6 +4919,7 @@ enabled libxavs           && require libxavs xavs.h xavs_encoder_encode -lxavs
 enabled libxvid           && require libxvid xvid.h xvid_global -lxvidcore
 enabled libzmq            && require_pkg_config libzmq zmq.h zmq_ctx_new
 enabled libzvbi           && require libzvbi libzvbi.h vbi_decoder_new -lzvbi
+enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; }
 enabled openal            && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                                check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
                                die "ERROR: openal not found"; } &&
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index fa0f53d..cc393f9 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER)           += mxpegdec.o
 OBJS-$(CONFIG_NELLYMOSER_DECODER)      += nellymoserdec.o nellymoser.o
 OBJS-$(CONFIG_NELLYMOSER_ENCODER)      += nellymoserenc.o nellymoser.o
 OBJS-$(CONFIG_NUV_DECODER)             += nuv.o rtjpeg.o
+OBJS-$(CONFIG_NVENC_ENCODER)           += nvenc.o
 OBJS-$(CONFIG_ON2AVC_DECODER)          += on2avc.o on2avcdata.o
 OBJS-$(CONFIG_OPUS_DECODER)            += opusdec.o opus.o opus_celt.o \
                                           opus_imdct.o opus_silk.o     \
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 0d39d33..8ceee2f 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -223,6 +223,7 @@ void avcodec_register_all(void)
     REGISTER_DECODER(MVC2,              mvc2);
     REGISTER_DECODER(MXPEG,             mxpeg);
     REGISTER_DECODER(NUV,               nuv);
+    REGISTER_ENCODER(NVENC,             nvenc);
     REGISTER_DECODER(PAF_VIDEO,         paf_video);
     REGISTER_ENCDEC (PAM,               pam);
     REGISTER_ENCDEC (PBM,               pbm);
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
new file mode 100644
index 0000000..ba98ee2
--- /dev/null
+++ b/libavcodec/nvenc.c
@@ -0,0 +1,1173 @@
+/*
+ * H.264 hardware encoding using nvidia nvenc
+ * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+#include <nvEncodeAPI.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/avassert.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "avcodec.h"
+#include "internal.h"
+#include "thread.h"
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+#define CUDAAPI __stdcall
+#define LOAD_FUNC(l, s) GetProcAddress(l, s)
+#define DL_CLOSE_FUNC(l) FreeLibrary(l)
+#else
+#define CUDAAPI
+#define LOAD_FUNC(l, s) dlsym(l, s)
+#define DL_CLOSE_FUNC(l) dlclose(l)
+#endif
+
+typedef enum cudaError_enum {
+    CUDA_SUCCESS = 0
+} CUresult;
+typedef int CUdevice;
+typedef void* CUcontext;
+
+typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags);
+typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count);
+typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal);
+typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev);
+typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx);
+typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx);
+
+typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList);
+
+static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } };
+
+typedef struct NvencInputSurface
+{
+    NV_ENC_INPUT_PTR input_surface;
+    int width;
+    int height;
+
+    int lockCount;
+
+    NV_ENC_BUFFER_FORMAT format;
+} NvencInputSurface;
+
+typedef struct NvencOutputSurface
+{
+    NV_ENC_OUTPUT_PTR output_surface;
+    int size;
+
+    NvencInputSurface *input_surface;
+
+    int busy;
+} NvencOutputSurface;
+
+typedef struct NvencOutputSurfaceList
+{
+    NvencOutputSurface *surface;
+    struct NvencOutputSurfaceList *next;
+} NvencOutputSurfaceList;
+
+typedef struct NvencTimestampList
+{
+    int64_t timestamp;
+    struct NvencTimestampList *next;
+} NvencTimestampList;
+
+typedef struct NvencDynLoadFunctions
+{
+    PCUINIT cu_init;
+    PCUDEVICEGETCOUNT cu_device_get_count;
+    PCUDEVICEGET cu_device_get;
+    PCUDEVICEGETNAME cu_device_get_name;
+    PCUDEVICECOMPUTECAPABILITY cu_device_compute_capability;
+    PCUCTXCREATE cu_ctx_create;
+    PCUCTXPOPCURRENT cu_ctx_pop_current;
+    PCUCTXDESTROY cu_ctx_destroy;
+
+    NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
+    int nvenc_device_count;
+    CUdevice nvenc_devices[16];
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+    HMODULE cuda_lib;
+    HMODULE nvenc_lib;
+#else
+    void *cuda_lib;
+    void *nvenc_lib;
+#endif
+} NvencDynLoadFunctions;
+
+typedef struct NvencContext
+{
+    AVClass *avclass;
+
+    NvencDynLoadFunctions nvenc_dload_funcs;
+
+    NV_ENC_INITIALIZE_PARAMS init_encode_params;
+    NV_ENC_CONFIG encode_config;
+    CUcontext cu_context;
+
+    int max_surface_count;
+    NvencInputSurface *input_surfaces;
+    NvencOutputSurface *output_surfaces;
+
+    NvencOutputSurfaceList *output_surface_queue;
+    NvencOutputSurfaceList *output_surface_ready_queue;
+    NvencTimestampList *timestamp_list;
+    int64_t last_dts;
+
+    void *nvencoder;
+
+    char *preset;
+    int cbr;
+    int twopass;
+    int gobpattern;
+    int gpu;
+} NvencContext;
+
+#define CHECK_LOAD_FUNC(t, f, s) \
+do { \
+    (f) = (t)LOAD_FUNC(dl_fn->cuda_lib, s); \
+    if (!(f)) { \
+        av_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \
+        goto error; \
+    } \
+} while (0)
+
+static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx)
+{
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+
+    if (dl_fn->cuda_lib)
+        return 1;
+
+#if defined(_WIN32)
+    dl_fn->cuda_lib = LoadLibrary(TEXT("nvcuda.dll"));
+#elif defined(__CYGWIN__)
+    dl_fn->cuda_lib = dlopen("nvcuda.dll", RTLD_LAZY);
+#else
+    dl_fn->cuda_lib = dlopen("libcuda.so", RTLD_LAZY);
+#endif
+
+    if (!dl_fn->cuda_lib) {
+        av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n");
+        goto error;
+    }
+
+    CHECK_LOAD_FUNC(PCUINIT, dl_fn->cu_init, "cuInit");
+    CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, dl_fn->cu_device_get_count, "cuDeviceGetCount");
+    CHECK_LOAD_FUNC(PCUDEVICEGET, dl_fn->cu_device_get, "cuDeviceGet");
+    CHECK_LOAD_FUNC(PCUDEVICEGETNAME, dl_fn->cu_device_get_name, "cuDeviceGetName");
+    CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, dl_fn->cu_device_compute_capability, "cuDeviceComputeCapability");
+    CHECK_LOAD_FUNC(PCUCTXCREATE, dl_fn->cu_ctx_create, "cuCtxCreate_v2");
+    CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, dl_fn->cu_ctx_pop_current, "cuCtxPopCurrent_v2");
+    CHECK_LOAD_FUNC(PCUCTXDESTROY, dl_fn->cu_ctx_destroy, "cuCtxDestroy_v2");
+
+    return 1;
+
+error:
+
+    if (dl_fn->cuda_lib)
+        DL_CLOSE_FUNC(dl_fn->cuda_lib);
+
+    dl_fn->cuda_lib = NULL;
+
+    return 0;
+}
+
+static av_cold int check_cuda_errors(AVCodecContext *avctx, CUresult err, const char *func)
+{
+    if (err != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err);
+        return 0;
+    }
+    return 1;
+}
+#define check_cuda_errors(f) if (!check_cuda_errors(avctx, f, #f)) goto error
+
+static av_cold int nvenc_check_cuda(AVCodecContext *avctx)
+{
+    int device_count = 0;
+    CUdevice cu_device = 0;
+    char gpu_name[128];
+    int smminor = 0, smmajor = 0;
+    int i, smver;
+
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+
+    if (!nvenc_dyload_cuda(avctx))
+        return 0;
+
+    if (dl_fn->nvenc_device_count > 0)
+        return 1;
+
+    check_cuda_errors(dl_fn->cu_init(0));
+
+    check_cuda_errors(dl_fn->cu_device_get_count(&device_count));
+
+    if (!device_count) {
+        av_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n");
+        goto error;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", device_count);
+
+    dl_fn->nvenc_device_count = 0;
+
+    for (i = 0; i < device_count; ++i) {
+        check_cuda_errors(dl_fn->cu_device_get(&cu_device, i));
+        check_cuda_errors(dl_fn->cu_device_get_name(gpu_name, sizeof(gpu_name), cu_device));
+        check_cuda_errors(dl_fn->cu_device_compute_capability(&smmajor, &smminor, cu_device));
+
+        smver = (smmajor << 4) | smminor;
+
+        av_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, smmajor, smminor, (smver >= 0x30) ? "Available" : "Not Available");
+
+        if (smver >= 0x30)
+            dl_fn->nvenc_devices[dl_fn->nvenc_device_count++] = cu_device;
+    }
+
+    if (!dl_fn->nvenc_device_count) {
+        av_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n");
+        goto error;
+    }
+
+    return 1;
+
+error:
+
+    dl_fn->nvenc_device_count = 0;
+
+    return 0;
+}
+
+static av_cold int nvenc_dyload_nvenc(AVCodecContext *avctx)
+{
+    PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0;
+    NVENCSTATUS nvstatus;
+
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+
+    if (!nvenc_check_cuda(avctx))
+        return 0;
+
+    if (dl_fn->nvenc_lib)
+        return 1;
+
+#if defined(_WIN32)
+    if (sizeof(void*) == 8) {
+        dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI64.dll"));
+    } else {
+        dl_fn->nvenc_lib = LoadLibrary(TEXT("nvEncodeAPI.dll"));
+    }
+#elif defined(__CYGWIN__)
+    if (sizeof(void*) == 8) {
+        dl_fn->nvenc_lib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY);
+    } else {
+        dl_fn->nvenc_lib = dlopen("nvEncodeAPI.dll", RTLD_LAZY);
+    }
+#else
+    dl_fn->nvenc_lib = dlopen("libnvidia-encode.so.1", RTLD_LAZY);
+#endif
+
+    if (!dl_fn->nvenc_lib) {
+        av_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n");
+        goto error;
+    }
+
+    nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(dl_fn->nvenc_lib, "NvEncodeAPICreateInstance");
+
+    if (!nvEncodeAPICreateInstance) {
+        av_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n");
+        goto error;
+    }
+
+    dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
+
+    nvstatus = nvEncodeAPICreateInstance(&dl_fn->nvenc_funcs);
+
+    if (nvstatus != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n");
+        goto error;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n");
+
+    return 1;
+
+error:
+    if (dl_fn->nvenc_lib)
+        DL_CLOSE_FUNC(dl_fn->nvenc_lib);
+
+    dl_fn->nvenc_lib = NULL;
+
+    return 0;
+}
+
+static av_cold void nvenc_unload_nvenc(AVCodecContext *avctx)
+{
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+
+    DL_CLOSE_FUNC(dl_fn->nvenc_lib);
+    dl_fn->nvenc_lib = NULL;
+
+    dl_fn->nvenc_device_count = 0;
+
+    DL_CLOSE_FUNC(dl_fn->cuda_lib);
+    dl_fn->cuda_lib = NULL;
+
+    dl_fn->cu_init = NULL;
+    dl_fn->cu_device_get_count = NULL;
+    dl_fn->cu_device_get = NULL;
+    dl_fn->cu_device_get_name = NULL;
+    dl_fn->cu_device_compute_capability = NULL;
+    dl_fn->cu_ctx_create = NULL;
+    dl_fn->cu_ctx_pop_current = NULL;
+    dl_fn->cu_ctx_destroy = NULL;
+
+    av_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n");
+}
+
+static int out_surf_queue_enqueue(NvencOutputSurfaceList** head, NvencOutputSurface *surface)
+{
+    if (!*head) {
+        *head = av_malloc(sizeof(NvencOutputSurfaceList));
+
+        if (!*head)
+            return AVERROR(ENOMEM);
+
+        (*head)->next = NULL;
+        (*head)->surface = surface;
+        return 0;
+    }
+
+    while ((*head)->next)
+        head = &((*head)->next);
+
+    (*head)->next = av_malloc(sizeof(NvencOutputSurfaceList));
+
+    if(!(*head)->next)
+        return AVERROR(ENOMEM);
+
+    (*head)->next->next = NULL;
+    (*head)->next->surface = surface;
+
+    return 0;
+}
+
+static NvencOutputSurface *out_surf_queue_dequeue(NvencOutputSurfaceList** head)
+{
+    NvencOutputSurfaceList *tmp;
+    NvencOutputSurface *res;
+
+    if (!*head)
+        return NULL;
+
+    tmp = *head;
+    res = tmp->surface;
+    *head = tmp->next;
+    av_free(tmp);
+
+    return res;
+}
+
+static int timestamp_list_insert_sorted(NvencTimestampList** head, int64_t timestamp)
+{
+    NvencTimestampList *newelem;
+    NvencTimestampList *prev;
+
+    if (!*head) {
+        *head = av_malloc(sizeof(NvencTimestampList));
+
+        if(!*head)
+            return AVERROR(ENOMEM);
+
+        (*head)->next = NULL;
+        (*head)->timestamp = timestamp;
+        return 0;
+    }
+
+    prev = NULL;
+    while (*head && timestamp >= (*head)->timestamp) {
+        prev = *head;
+        head = &((*head)->next);
+    }
+
+    newelem = av_malloc(sizeof(NvencTimestampList));
+
+    if (!newelem)
+        return AVERROR(ENOMEM);
+
+    newelem->next = *head;
+    newelem->timestamp = timestamp;
+
+    if (*head) {
+        *head = newelem;
+    } else {
+        prev->next = newelem;
+    }
+
+    return 0;
+}
+
+static int64_t timestamp_list_get_lowest(NvencTimestampList** head)
+{
+    NvencTimestampList *tmp;
+    int64_t res;
+
+    if (!*head)
+        return 0;
+
+    tmp = *head;
+    res = tmp->timestamp;
+    *head = tmp->next;
+    av_free(tmp);
+
+    return res;
+}
+
+static av_cold int nvenc_encode_init(AVCodecContext *avctx)
+{
+    NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encode_session_params = { 0 };
+    NV_ENC_PRESET_CONFIG preset_config = { 0 };
+    CUcontext cu_context_curr;
+    CUresult cu_res;
+    GUID encoder_preset = NV_ENC_PRESET_HQ_GUID;
+    GUID license = dummy_license;
+    NVENCSTATUS nv_status = NV_ENC_SUCCESS;
+    int surfaceCount = 0;
+    int i, num_mbs;
+    int isLL = 0;
+    int res = 0;
+
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+
+    if (!nvenc_dyload_nvenc(avctx))
+        return AVERROR_EXTERNAL;
+
+    avctx->coded_frame = av_frame_alloc();
+    if (!avctx->coded_frame) {
+        res = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    ctx->output_surface_queue = NULL;
+    ctx->output_surface_ready_queue = NULL;
+    ctx->timestamp_list = NULL;
+    ctx->last_dts = AV_NOPTS_VALUE;
+    ctx->nvencoder = NULL;
+
+    ctx->encode_config.version = NV_ENC_CONFIG_VER;
+    ctx->init_encode_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
+    preset_config.version = NV_ENC_PRESET_CONFIG_VER;
+    preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
+    encode_session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
+    encode_session_params.apiVersion = NVENCAPI_VERSION;
+    encode_session_params.clientKeyPtr = &license;
+
+    if (ctx->gpu >= dl_fn->nvenc_device_count) {
+        av_log(avctx, AV_LOG_FATAL, "Requested GPU %d, but only %d GPUs are available!\n", ctx->gpu, dl_fn->nvenc_device_count);
+        res = AVERROR(EINVAL);
+        goto error;
+    }
+
+    ctx->cu_context = NULL;
+    cu_res = dl_fn->cu_ctx_create(&ctx->cu_context, 0, dl_fn->nvenc_devices[ctx->gpu]);
+
+    if (cu_res != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC: 0x%x\n", (int)cu_res);
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    cu_res = dl_fn->cu_ctx_pop_current(&cu_context_curr);
+
+    if (cu_res != CUDA_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "Failed popping CUDA context: 0x%x\n", (int)cu_res);
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    encode_session_params.device = ctx->cu_context;
+    encode_session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+
+    nv_status = p_nvenc->nvEncOpenEncodeSessionEx(&encode_session_params, &ctx->nvencoder);
+    if (nv_status != NV_ENC_SUCCESS) {
+        ctx->nvencoder = NULL;
+        av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nv_status);
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    if (ctx->preset) {
+        if (!strcmp(ctx->preset, "hp")) {
+            encoder_preset = NV_ENC_PRESET_HP_GUID;
+        } else if (!strcmp(ctx->preset, "hq")) {
+            encoder_preset = NV_ENC_PRESET_HQ_GUID;
+        } else if (!strcmp(ctx->preset, "bd")) {
+            encoder_preset = NV_ENC_PRESET_BD_GUID;
+        } else if (!strcmp(ctx->preset, "ll")) {
+            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "llhp")) {
+            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "llhq")) {
+            encoder_preset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID;
+            isLL = 1;
+        } else if (!strcmp(ctx->preset, "default")) {
+            encoder_preset = NV_ENC_PRESET_DEFAULT_GUID;
+        } else {
+            av_log(avctx, AV_LOG_FATAL, "Preset \"%s\" is unknown! Supported presets: hp, hq, bd, ll, llhp, llhq, default\n", ctx->preset);
+            res = AVERROR(EINVAL);
+            goto error;
+        }
+    }
+
+    nv_status = p_nvenc->nvEncGetEncodePresetConfig(ctx->nvencoder, NV_ENC_CODEC_H264_GUID, encoder_preset, &preset_config);
+    if (nv_status != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nv_status);
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    ctx->init_encode_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
+    ctx->init_encode_params.encodeHeight = avctx->height;
+    ctx->init_encode_params.encodeWidth = avctx->width;
+    ctx->init_encode_params.darHeight = avctx->height;
+    ctx->init_encode_params.darWidth = avctx->width;
+    ctx->init_encode_params.frameRateNum = avctx->time_base.den;
+    ctx->init_encode_params.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame;
+
+    num_mbs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4);
+    ctx->max_surface_count = (num_mbs >= 8160) ? 16 : 32;
+
+    ctx->init_encode_params.enableEncodeAsync = 0;
+    ctx->init_encode_params.enablePTD = 1;
+
+    ctx->init_encode_params.presetGUID = encoder_preset;
+
+    ctx->init_encode_params.encodeConfig = &ctx->encode_config;
+    memcpy(&ctx->encode_config, &preset_config.presetCfg, sizeof(ctx->encode_config));
+    ctx->encode_config.version = NV_ENC_CONFIG_VER;
+
+    if (avctx->gop_size >= 0) {
+        ctx->encode_config.gopLength = avctx->gop_size;
+        ctx->encode_config.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size;
+    }
+
+    if (avctx->bit_rate > 0)
+        ctx->encode_config.rcParams.averageBitRate = avctx->bit_rate;
+
+    if (avctx->rc_max_rate > 0)
+        ctx->encode_config.rcParams.maxBitRate = avctx->rc_max_rate;
+
+    if (ctx->cbr) {
+        if (!ctx->twopass) {
+            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        } else if (ctx->twopass == 1 || isLL) {
+            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY;
+
+            ctx->encode_config.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
+            ctx->encode_config.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
+
+            if (!isLL)
+                av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n");
+        } else {
+            ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
+        }
+    } else if (avctx->global_quality > 0) {
+        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
+        ctx->encode_config.rcParams.constQP.qpInterB = avctx->global_quality;
+        ctx->encode_config.rcParams.constQP.qpInterP = avctx->global_quality;
+        ctx->encode_config.rcParams.constQP.qpIntra = avctx->global_quality;
+
+        avctx->qmin = -1;
+        avctx->qmax = -1;
+    } else if (avctx->qmin >= 0 && avctx->qmax >= 0) {
+        ctx->encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR;
+
+        ctx->encode_config.rcParams.enableMinQP = 1;
+        ctx->encode_config.rcParams.enableMaxQP = 1;
+
+        ctx->encode_config.rcParams.minQP.qpInterB = avctx->qmin;
+        ctx->encode_config.rcParams.minQP.qpInterP = avctx->qmin;
+        ctx->encode_config.rcParams.minQP.qpIntra = avctx->qmin;
+
+        ctx->encode_config.rcParams.maxQP.qpInterB = avctx->qmax;
+        ctx->encode_config.rcParams.maxQP.qpInterP = avctx->qmax;
+        ctx->encode_config.rcParams.maxQP.qpIntra = avctx->qmax;
+    }
+
+    if (avctx->rc_buffer_size > 0)
+        ctx->encode_config.rcParams.vbvBufferSize = avctx->rc_buffer_size;
+
+    if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD;
+    } else {
+        ctx->encode_config.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
+    }
+
+    switch (avctx->profile) {
+    case FF_PROFILE_H264_BASELINE:
+        ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID;
+        break;
+    case FF_PROFILE_H264_MAIN:
+        ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
+        break;
+    case FF_PROFILE_H264_HIGH:
+    case FF_PROFILE_UNKNOWN:
+        ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+        break;
+    default:
+        av_log(avctx, AV_LOG_WARNING, "Unsupported h264 profile requested, falling back to high\n");
+        ctx->encode_config.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID;
+        break;
+    }
+
+    if (ctx->gobpattern >= 0) {
+        ctx->encode_config.frameIntervalP = 1;
+    }
+
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
+
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace;
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries;
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc;
+
+    ctx->encode_config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG;
+
+    ctx->encode_config.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0;
+
+    nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+    if (nv_status != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nv_status);
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    ctx->input_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->input_surfaces));
+
+    if (!ctx->input_surfaces) {
+        res = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    ctx->output_surfaces = av_malloc(ctx->max_surface_count * sizeof(*ctx->output_surfaces));
+
+    if (!ctx->output_surfaces) {
+        res = AVERROR(ENOMEM);
+        goto error;
+    }
+
+    for (surfaceCount = 0; surfaceCount < ctx->max_surface_count; ++surfaceCount) {
+        NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 };
+        NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
+        allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
+        allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
+
+        allocSurf.width = (avctx->width + 31) & ~31;
+        allocSurf.height = (avctx->height + 31) & ~31;
+
+        allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        switch (avctx->pix_fmt) {
+        case AV_PIX_FMT_YUV420P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL;
+            break;
+
+        case AV_PIX_FMT_NV12:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL;
+            break;
+
+        case AV_PIX_FMT_YUV444P:
+            allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL;
+            break;
+
+        default:
+            av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
+            res = AVERROR(EINVAL);
+            goto error;
+        }
+
+        nv_status = p_nvenc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf);
+        if (nv_status = NV_ENC_SUCCESS){
+            av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n");
+            res = AVERROR_EXTERNAL;
+            goto error;
+        }
+
+        ctx->input_surfaces[surfaceCount].lockCount = 0;
+        ctx->input_surfaces[surfaceCount].input_surface = allocSurf.inputBuffer;
+        ctx->input_surfaces[surfaceCount].format = allocSurf.bufferFmt;
+        ctx->input_surfaces[surfaceCount].width = allocSurf.width;
+        ctx->input_surfaces[surfaceCount].height = allocSurf.height;
+
+        /* 1MB is large enough to hold most output frames. NVENC increases this automaticaly if it's not enough. */
+        allocOut.size = 1024 * 1024;
+
+        allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED;
+
+        nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut);
+        if (nv_status = NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n");
+            ctx->output_surfaces[surfaceCount++].output_surface = NULL;
+            res = AVERROR_EXTERNAL;
+            goto error;
+        }
+
+        ctx->output_surfaces[surfaceCount].output_surface = allocOut.bitstreamBuffer;
+        ctx->output_surfaces[surfaceCount].size = allocOut.size;
+        ctx->output_surfaces[surfaceCount].busy = 0;
+    }
+
+    if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) {
+        uint32_t outSize = 0;
+        char tmpHeader[256];
+        NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 };
+        payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER;
+
+        payload.spsppsBuffer = tmpHeader;
+        payload.inBufferSize = sizeof(tmpHeader);
+        payload.outSPSPPSPayloadSize = &outSize;
+
+        nv_status = p_nvenc->nvEncGetSequenceParams(ctx->nvencoder, &payload);
+        if (nv_status != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n");
+            goto error;
+        }
+
+        avctx->extradata_size = outSize;
+        avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE);
+
+        if (!avctx->extradata) {
+            res = AVERROR(ENOMEM);
+            goto error;
+        }
+
+        memcpy(avctx->extradata, tmpHeader, outSize);
+    }
+
+    if (ctx->encode_config.frameIntervalP > 1)
+        avctx->has_b_frames = 2;
+
+    if (ctx->encode_config.rcParams.averageBitRate > 0)
+        avctx->bit_rate = ctx->encode_config.rcParams.averageBitRate;
+
+    return 0;
+
+error:
+
+    for (i = 0; i < surfaceCount; ++i) {
+        p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
+        if (ctx->output_surfaces[i].output_surface)
+            p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
+    }
+
+    if (ctx->nvencoder)
+        p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
+
+    if (ctx->cu_context)
+        dl_fn->cu_ctx_destroy(ctx->cu_context);
+
+    av_frame_free(&avctx->coded_frame);
+
+    nvenc_unload_nvenc(avctx);
+
+    ctx->nvencoder = NULL;
+    ctx->cu_context = NULL;
+
+    return res;
+}
+
+static av_cold int nvenc_encode_close(AVCodecContext *avctx)
+{
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+    int i;
+
+    while (ctx->timestamp_list)
+        timestamp_list_get_lowest(&ctx->timestamp_list);
+
+    while (ctx->output_surface_ready_queue)
+        out_surf_queue_dequeue(&ctx->output_surface_ready_queue);
+
+    while (ctx->output_surface_queue)
+        out_surf_queue_dequeue(&ctx->output_surface_queue);
+
+    for (i = 0; i < ctx->max_surface_count; ++i) {
+        p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->input_surfaces[i].input_surface);
+        p_nvenc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->output_surfaces[i].output_surface);
+    }
+    ctx->max_surface_count = 0;
+
+    p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
+    ctx->nvencoder = NULL;
+
+    dl_fn->cu_ctx_destroy(ctx->cu_context);
+    ctx->cu_context = NULL;
+
+    nvenc_unload_nvenc(avctx);
+
+    av_frame_free(&avctx->coded_frame);
+
+    return 0;
+}
+
+static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf)
+{
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+
+    uint32_t *slice_offsets = av_mallocz(ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData * sizeof(*slice_offsets));
+    NV_ENC_LOCK_BITSTREAM lock_params = { 0 };
+    NVENCSTATUS nv_status;
+    int res = 0;
+
+    if (!slice_offsets)
+        return AVERROR(ENOMEM);
+
+    lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
+
+    lock_params.doNotWait = 0;
+    lock_params.outputBitstream = tmpoutsurf->output_surface;
+    lock_params.sliceOffsets = slice_offsets;
+
+    nv_status = p_nvenc->nvEncLockBitstream(ctx->nvencoder, &lock_params);
+    if (nv_status != NV_ENC_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n");
+        res = AVERROR_EXTERNAL;
+        goto error;
+    }
+
+    if (res = ff_alloc_packet2(avctx, pkt, lock_params.bitstreamSizeInBytes)) {
+        p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
+        goto error;
+    }
+
+    memcpy(pkt->data, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
+
+    nv_status = p_nvenc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->output_surface);
+    if (nv_status != NV_ENC_SUCCESS)
+        av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n");
+
+    switch (lock_params.pictureType) {
+    case NV_ENC_PIC_TYPE_IDR:
+        pkt->flags |= AV_PKT_FLAG_KEY;
+    case NV_ENC_PIC_TYPE_I:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
+        break;
+    case NV_ENC_PIC_TYPE_P:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P;
+        break;
+    case NV_ENC_PIC_TYPE_B:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B;
+        break;
+    case NV_ENC_PIC_TYPE_BI:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI;
+        break;
+    default:
+        avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE;
+        break;
+    }
+
+    pkt->pts = lock_params.outputTimeStamp;
+    pkt->dts = timestamp_list_get_lowest(&ctx->timestamp_list);
+
+    if (pkt->dts > pkt->pts)
+        pkt->dts = pkt->pts;
+
+    if (ctx->last_dts != AV_NOPTS_VALUE && pkt->dts <= ctx->last_dts)
+        pkt->dts = ctx->last_dts + 1;
+
+    ctx->last_dts = pkt->dts;
+
+    av_free(slice_offsets);
+
+    return 0;
+
+error:
+
+    av_free(slice_offsets);
+    timestamp_list_get_lowest(&ctx->timestamp_list);
+
+    return res;
+}
+
+static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
+    const AVFrame *frame, int *got_packet)
+{
+    NVENCSTATUS nv_status;
+    NvencOutputSurface *tmpoutsurf;
+    int res, i = 0;
+
+    NvencContext *ctx = avctx->priv_data;
+    NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+
+    NV_ENC_PIC_PARAMS pic_params = { 0 };
+    pic_params.version = NV_ENC_PIC_PARAMS_VER;
+
+    if (frame) {
+        NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 };
+        NvencInputSurface *inSurf = NULL;
+
+        for (i = 0; i < ctx->max_surface_count; ++i) {
+            if (!ctx->input_surfaces[i].lockCount) {
+                inSurf = &ctx->input_surfaces[i];
+                break;
+            }
+        }
+
+        av_assert0(inSurf);
+
+        inSurf->lockCount = 1;
+
+        lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
+        lockBufferParams.inputBuffer = inSurf->input_surface;
+
+        nv_status = p_nvenc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams);
+        if (nv_status != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n");
+            return 0;
+        }
+
+        if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
+                frame->data[2], frame->linesize[2],
+                avctx->width >> 1, avctx->height >> 1);
+
+            buf += (inSurf->height * lockBufferParams.pitch) >> 2;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch >> 1,
+                frame->data[1], frame->linesize[1],
+                avctx->width >> 1, avctx->height >> 1);
+        } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height >> 1);
+        } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
+            uint8_t *buf = lockBufferParams.bufferDataPtr;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[0], frame->linesize[0],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[1], frame->linesize[1],
+                avctx->width, avctx->height);
+
+            buf += inSurf->height * lockBufferParams.pitch;
+
+            av_image_copy_plane(buf, lockBufferParams.pitch,
+                frame->data[2], frame->linesize[2],
+                avctx->width, avctx->height);
+        } else {
+            av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
+            return AVERROR(EINVAL);
+        }
+
+        nv_status = p_nvenc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->input_surface);
+        if (nv_status != NV_ENC_SUCCESS) {
+            av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n");
+            return AVERROR_EXTERNAL;
+        }
+
+        for (i = 0; i < ctx->max_surface_count; ++i)
+            if (!ctx->output_surfaces[i].busy)
+                break;
+
+        if (i == ctx->max_surface_count) {
+            inSurf->lockCount = 0;
+            av_log(avctx, AV_LOG_FATAL, "No free output surface found!\n");
+            return AVERROR_EXTERNAL;
+        }
+
+        ctx->output_surfaces[i].input_surface = inSurf;
+
+        pic_params.inputBuffer = inSurf->input_surface;
+        pic_params.bufferFmt = inSurf->format;
+        pic_params.inputWidth = avctx->width;
+        pic_params.inputHeight = avctx->height;
+        pic_params.outputBitstream = ctx->output_surfaces[i].output_surface;
+        pic_params.completionEvent = 0;
+
+        if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) {
+            if (frame->top_field_first) {
+                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM;
+            } else {
+                pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP;
+            }
+        } else {
+            pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
+        }
+
+        pic_params.encodePicFlags = 0;
+        pic_params.inputTimeStamp = frame->pts;
+        pic_params.inputDuration = 0;
+        pic_params.codecPicParams.h264PicParams.sliceMode = ctx->encode_config.encodeCodecConfig.h264Config.sliceMode;
+        pic_params.codecPicParams.h264PicParams.sliceModeData = ctx->encode_config.encodeCodecConfig.h264Config.sliceModeData;
+        memcpy(&pic_params.rcParams, &ctx->encode_config.rcParams, sizeof(NV_ENC_RC_PARAMS));
+
+        res = timestamp_list_insert_sorted(&ctx->timestamp_list, frame->pts);
+
+        if (res)
+            return res;
+    } else {
+        pic_params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
+    }
+
+    nv_status = p_nvenc->nvEncEncodePicture(ctx->nvencoder, &pic_params);
+
+    if (frame && nv_status == NV_ENC_ERR_NEED_MORE_INPUT) {
+        res = out_surf_queue_enqueue(&ctx->output_surface_queue, &ctx->output_surfaces[i]);
+
+        if (res)
+            return res;
+
+        ctx->output_surfaces[i].busy = 1;
+    }
+
+    if (nv_status != NV_ENC_SUCCESS && nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
+        av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    if (nv_status != NV_ENC_ERR_NEED_MORE_INPUT) {
+        while (ctx->output_surface_queue) {
+            tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_queue);
+            res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, tmpoutsurf);
+
+            if (res)
+                return res;
+        }
+
+        if (frame) {
+            res = out_surf_queue_enqueue(&ctx->output_surface_ready_queue, &ctx->output_surfaces[i]);
+
+            if (res)
+                return res;
+
+            ctx->output_surfaces[i].busy = 1;
+        }
+    }
+
+    if (ctx->output_surface_ready_queue) {
+        tmpoutsurf = out_surf_queue_dequeue(&ctx->output_surface_ready_queue);
+
+        res = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf);
+
+        if (res)
+            return res;
+
+        tmpoutsurf->busy = 0;
+        av_assert0(tmpoutsurf->input_surface->lockCount);
+        tmpoutsurf->input_surface->lockCount--;
+
+        *got_packet = 1;
+    } else {
+        *got_packet = 0;
+    }
+
+    return 0;
+}
+
+static enum AVPixelFormat pix_fmts_nvenc[] = {
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_NONE
+};
+
+#define OFFSET(x) offsetof(NvencContext, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+    { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE },
+    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
+    { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE },
+    { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE },
+    { "gpu", "Selects which NVENC capable GPU to use. First GPU is 0, second is 1, and so on.", OFFSET(gpu), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+    { NULL }
+};
+
+static const AVClass nvenc_class = {
+    .class_name = "nvenc",
+    .item_name = av_default_item_name,
+    .option = options,
+    .version = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVCodecDefault nvenc_defaults[] = {
+    { "b", "0" },
+    { "qmin", "-1" },
+    { "qmax", "-1" },
+    { "qdiff", "-1" },
+    { "qblur", "-1" },
+    { "qcomp", "-1" },
+    { NULL },
+};
+
+AVCodec ff_nvenc_encoder = {
+    .name = "nvenc",
+    .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"),
+    .type = AVMEDIA_TYPE_VIDEO,
+    .id = AV_CODEC_ID_H264,
+    .priv_data_size = sizeof(NvencContext),
+    .init = nvenc_encode_init,
+    .encode2 = nvenc_encode_frame,
+    .close = nvenc_encode_close,
+    .capabilities = CODEC_CAP_DELAY,
+    .priv_class = &nvenc_class,
+    .defaults = nvenc_defaults,
+    .pix_fmts = pix_fmts_nvenc,
+};

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to