Please mark it as non-free, the header clearly says that it must not be used for open-source software.
It only says not to distribute the header, the compiled binary and the code using it should be free to distribute.
Please use tools/patcheck on your patchfile: It shows many issues most of which you should be able to fix quickly, skip the ones that make no sense to you, the tool is not error-free. (Tabs and trailing whitespace cannot be committed.)
Fixed now, new patch is attached and on github.
From 4239af3cf66cdaa9ad99386bf728af4c1d1aca8a Mon Sep 17 00:00:00 2001 From: Timo Rothenpieler <t...@rothenpieler.org> Date: Wed, 26 Nov 2014 11:08:11 +0100 Subject: [PATCH] Add NVENC encoder --- Changelog | 1 + configure | 11 +- libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/nvenc.c | 932 ++++++++++++++++++++++++++++++++++++++++++++++++ libavcodec/nvenc_api.c | 275 ++++++++++++++ libavcodec/nvenc_api.h | 35 ++ libavcodec/nvenc_cuda.h | 62 ++++ 8 files changed, 1316 insertions(+), 2 deletions(-) create mode 100644 libavcodec/nvenc.c create mode 100644 libavcodec/nvenc_api.c create mode 100644 libavcodec/nvenc_api.h create mode 100644 libavcodec/nvenc_cuda.h diff --git a/Changelog b/Changelog index 7172d0c..d26b7fa 100644 --- a/Changelog +++ b/Changelog @@ -17,6 +17,7 @@ version <next>: - WebP muxer with animated WebP support - zygoaudio decoding support - APNG demuxer +- nvenc encoder version 2.4: diff --git a/configure b/configure index 38619c4..d0b790c 100755 --- a/configure +++ b/configure @@ -261,6 +261,7 @@ External library support: --enable-libzvbi enable teletext support via libzvbi [no] --disable-lzma disable lzma [autodetect] --enable-decklink enable Blackmagick DeckLink I/O support [no] + --enable-nvenc enable NVIDIA NVENC support [no] --enable-openal enable OpenAL 1.1 capture support [no] --enable-opencl enable OpenCL code --enable-opengl enable OpenGL rendering [no] @@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST=" libzmq libzvbi lzma + nvenc openal opencl opengl @@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid" libutvideo_decoder_deps="libutvideo" libutvideo_encoder_deps="libutvideo" libzvbi_teletext_decoder_deps="libzvbi" +nvenc_encoder_deps="nvenc" # demuxers / muxers ac3_demuxer_select="ac3_parser" @@ -2569,9 +2572,7 @@ drawtext_filter_deps="libfreetype" ebur128_filter_deps="gpl" flite_filter_deps="libflite" frei0r_filter_deps="frei0r dlopen" -frei0r_filter_extralibs='$ldl' frei0r_src_filter_deps="frei0r dlopen" -frei0r_src_filter_extralibs='$ldl' geq_filter_deps="gpl" histeq_filter_deps="gpl" hqdn3d_filter_deps="gpl" @@ -4650,6 +4651,11 @@ elif check_func dlopen -ldl; then ldl=-ldl fi +# set a few flags which depend on ldl and can't be set earlier +nvenc_encoder_extralibs='$ldl' +frei0r_filter_extralibs='$ldl' +frei0r_src_filter_extralibs='$ldl' + if ! disabled network; then check_func getaddrinfo $network_extralibs check_func getservbyport $network_extralibs @@ -4913,6 +4919,7 @@ enabled libxavs && require libxavs xavs.h xavs_encoder_encode -lxavs enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore enabled libzmq && require_pkg_config libzmq zmq.h zmq_ctx_new enabled libzvbi && require libzvbi libzvbi.h vbi_decoder_new -lzvbi +enabled nvenc && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } || die "ERROR: openal not found"; } && diff --git a/libavcodec/Makefile b/libavcodec/Makefile index fa0f53d..cc41564 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER) += mxpegdec.o OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o +OBJS-$(CONFIG_NVENC_ENCODER) += nvenc.o nvenc_api.o OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o \ opus_imdct.o opus_silk.o \ diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 0d39d33..8ceee2f 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -223,6 +223,7 @@ void avcodec_register_all(void) REGISTER_DECODER(MVC2, mvc2); REGISTER_DECODER(MXPEG, mxpeg); REGISTER_DECODER(NUV, nuv); + REGISTER_ENCODER(NVENC, nvenc); REGISTER_DECODER(PAF_VIDEO, paf_video); REGISTER_ENCDEC (PAM, pam); REGISTER_ENCDEC (PBM, pbm); diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c new file mode 100644 index 0000000..3cb98d3 --- /dev/null +++ b/libavcodec/nvenc.c @@ -0,0 +1,932 @@ +/* + * H.264 hardware encoding using nvidia nvenc + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef _WIN32 +#include <windows.h> +#endif + +#include "libavutil/internal.h" +#include "libavutil/imgutils.h" +#include "libavutil/avassert.h" +#include "libavutil/opt.h" +#include "libavutil/mem.h" +#include "avcodec.h" +#include "internal.h" + +#include "nvenc_cuda.h" +#include "nvenc_api.h" + +typedef struct NvencInputSurface +{ + NV_ENC_INPUT_PTR inputSurface; + int width; + int height; + + int lockCount; + + NV_ENC_BUFFER_FORMAT format; +} NvencInputSurface; + +typedef struct NvencOutputSurface +{ + NV_ENC_OUTPUT_PTR outputSurface; + int size; + + NvencInputSurface *inputSurface; + + int busy; +} NvencOutputSurface; + +typedef struct NvencOutputSurfaceList +{ + NvencOutputSurface *surface; + struct NvencOutputSurfaceList *next; +} NvencOutputSurfaceList; + +typedef struct NvencTimestampList +{ + int64_t timestamp; + struct NvencTimestampList *next; +} NvencTimestampList; + +typedef struct NvencContext +{ + AVClass *avclass; + + NV_ENC_INITIALIZE_PARAMS initEncodeParams; + NV_ENC_CONFIG encodeConfig; + CUcontext cuContext; + + int maxSurfaceCount; + NvencInputSurface *inputSurfaces; + NvencOutputSurface *outputSurfaces; + + NvencOutputSurfaceList *outputSurfaceQueue; + NvencOutputSurfaceList *outputSurfaceReadyQueue; + NvencTimestampList *timestampList; + int64_t lastDts; + + void *nvencoder; + + char *profile; + char *preset; + int cqp; + int cbr; + int twopass; + int gobpattern; +} NvencContext; + +static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 } }; + +static void out_surf_queue_push(NvencOutputSurfaceList** head, NvencOutputSurface *surface) +{ + if (!*head) { + *head = av_malloc(sizeof(NvencOutputSurfaceList)); + (*head)->next = 0; + (*head)->surface = surface; + return; + } + + while ((*head)->next) + head = &((*head)->next); + + (*head)->next = av_malloc(sizeof(NvencOutputSurfaceList)); + (*head)->next->next = 0; + (*head)->next->surface = surface; +} + +static NvencOutputSurface *out_surf_queue_pop(NvencOutputSurfaceList** head) +{ + NvencOutputSurfaceList *tmp; + NvencOutputSurface *res; + + if (!*head) + return 0; + + tmp = *head; + res = tmp->surface; + *head = tmp->next; + av_free(tmp); + + return res; +} + +static void timestamp_list_insert_sorted(NvencTimestampList** head, int64_t timestamp) +{ + NvencTimestampList *newelem; + NvencTimestampList *prev; + + if (!*head) { + *head = av_malloc(sizeof(NvencTimestampList)); + (*head)->next = 0; + (*head)->timestamp = timestamp; + return; + } + + prev = 0; + while (*head && timestamp >= (*head)->timestamp) { + prev = *head; + head = &((*head)->next); + } + + newelem = av_malloc(sizeof(NvencTimestampList)); + newelem->next = *head; + newelem->timestamp = timestamp; + + if (*head) { + *head = newelem; + } else { + prev->next = newelem; + } +} + +static int64_t timestamp_list_get_lowest(NvencTimestampList** head) +{ + NvencTimestampList *tmp; + int64_t res; + + if (!*head) + return 0; + + tmp = *head; + res = tmp->timestamp; + *head = tmp->next; + av_free(tmp); + + return res; +} + +static av_cold int nvenc_encode_init(AVCodecContext *avctx) +{ + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 }; + NV_ENC_PRESET_CONFIG presetConfig = { 0 }; + CUcontext cuContextCurr; + GUID encoderPreset = NV_ENC_PRESET_HQ_GUID; + GUID license = dummy_license; + NVENCSTATUS nvStatus = NV_ENC_SUCCESS; + int surfaceCount = 0; + int i, numMBs; + int isLL = 0; + + NvencContext *ctx = avctx->priv_data; + + if (!ff_nvenc_dyload_nvenc(avctx)) + return AVERROR_EXTERNAL; + + avctx->coded_frame = av_frame_alloc(); + if (!avctx->coded_frame) + return AVERROR(ENOMEM); + + memset(&ctx->initEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS)); + memset(&ctx->encodeConfig, 0, sizeof(NV_ENC_CONFIG)); + + ctx->outputSurfaceQueue = 0; + ctx->outputSurfaceReadyQueue = 0; + ctx->timestampList = 0; + ctx->lastDts = AV_NOPTS_VALUE; + ctx->nvencoder = 0; + + ctx->encodeConfig.version = NV_ENC_CONFIG_VER; + ctx->initEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER; + presetConfig.version = NV_ENC_PRESET_CONFIG_VER; + presetConfig.presetCfg.version = NV_ENC_CONFIG_VER; + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION; + stEncodeSessionParams.clientKeyPtr = &license; + + ctx->cuContext = 0; + if (ff_cuCtxCreate(&ctx->cuContext, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS + || ff_cuCtxPopCurrent(&cuContextCurr) != CUDA_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for NVENC\n"); + goto error; + } + + stEncodeSessionParams.device = (void*)ctx->cuContext; + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &ctx->nvencoder); + if (nvStatus != NV_ENC_SUCCESS) { + ctx->nvencoder = 0; + av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - invalid license key?\n", (int)nvStatus); + goto error; + } + + if (ctx->preset) { + if (!strcmp(ctx->preset, "hp")) { + encoderPreset = NV_ENC_PRESET_HP_GUID; + } else if (!strcmp(ctx->preset, "hq")) { + encoderPreset = NV_ENC_PRESET_HQ_GUID; + } else if (!strcmp(ctx->preset, "bd")) { + encoderPreset = NV_ENC_PRESET_BD_GUID; + } else if (!strcmp(ctx->preset, "ll")) { + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID; + isLL = 1; + } else if (!strcmp(ctx->preset, "llhp")) { + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID; + isLL = 1; + } else if (!strcmp(ctx->preset, "llhq")) { + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID; + isLL = 1; + } else if (!strcmp(ctx->preset, "default")) { + encoderPreset = NV_ENC_PRESET_DEFAULT_GUID; + } else { + av_log(avctx, AV_LOG_ERROR, "Preset \"%s\" is unknown!\n", ctx->preset); + } + } + + nvStatus = ff_pNvEnc->nvEncGetEncodePresetConfig(ctx->nvencoder, NV_ENC_CODEC_H264_GUID, encoderPreset, &presetConfig); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", (int)nvStatus); + goto error; + } + + ctx->initEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID; + ctx->initEncodeParams.encodeHeight = avctx->height; + ctx->initEncodeParams.encodeWidth = avctx->width; + ctx->initEncodeParams.darHeight = avctx->height; + ctx->initEncodeParams.darWidth = avctx->width; + ctx->initEncodeParams.frameRateNum = avctx->time_base.den; + ctx->initEncodeParams.frameRateDen = avctx->time_base.num * avctx->ticks_per_frame; + + numMBs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4); + ctx->maxSurfaceCount = (numMBs >= 8160) ? 16 : 32; + + ctx->initEncodeParams.enableEncodeAsync = 0; + ctx->initEncodeParams.enablePTD = 1; + + ctx->initEncodeParams.presetGUID = encoderPreset; + + ctx->initEncodeParams.encodeConfig = &ctx->encodeConfig; + memcpy(&ctx->encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG)); + ctx->encodeConfig.version = NV_ENC_CONFIG_VER; + + if (avctx->gop_size >= 0) { + ctx->encodeConfig.gopLength = avctx->gop_size; + ctx->encodeConfig.encodeCodecConfig.h264Config.idrPeriod = avctx->gop_size; + } + + if (avctx->bit_rate > 0) + ctx->encodeConfig.rcParams.averageBitRate = avctx->bit_rate; + + if (avctx->rc_max_rate > 0) + ctx->encodeConfig.rcParams.maxBitRate = avctx->rc_max_rate; + + if (ctx->cbr) { + if (!ctx->twopass) { + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; + } else if (ctx->twopass == 1 || isLL) { + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_2_PASS_QUALITY; + + ctx->encodeConfig.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE; + ctx->encodeConfig.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE; + + if (!isLL) + av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to work with low latency (ll, llhq, llhp) presets.\n"); + } else { + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; + } + } else if (ctx->cqp >= 0) { + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; + ctx->encodeConfig.rcParams.constQP.qpInterB = ctx->cqp; + ctx->encodeConfig.rcParams.constQP.qpInterP = ctx->cqp; + ctx->encodeConfig.rcParams.constQP.qpIntra = ctx->cqp; + + avctx->qmin = -1; + avctx->qmax = -1; + } else if (avctx->qmin >= 0 && avctx->qmax >= 0) { + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; + + ctx->encodeConfig.rcParams.enableMinQP = 1; + ctx->encodeConfig.rcParams.enableMaxQP = 1; + + ctx->encodeConfig.rcParams.minQP.qpInterB = avctx->qmin; + ctx->encodeConfig.rcParams.minQP.qpInterP = avctx->qmin; + ctx->encodeConfig.rcParams.minQP.qpIntra = avctx->qmin; + + ctx->encodeConfig.rcParams.maxQP.qpInterB = avctx->qmax; + ctx->encodeConfig.rcParams.maxQP.qpInterP = avctx->qmax; + ctx->encodeConfig.rcParams.maxQP.qpIntra = avctx->qmax; + } + + if (avctx->rc_buffer_size > 0) + ctx->encodeConfig.rcParams.vbvBufferSize = avctx->rc_buffer_size; + + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { + ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; + } else { + ctx->encodeConfig.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME; + } + + if (!ctx->profile) { + switch (avctx->profile) { + case FF_PROFILE_H264_BASELINE: + ctx->profile = av_strdup("baseline"); + break; + case FF_PROFILE_H264_MAIN: + ctx->profile = av_strdup("main"); + break; + default: + ctx->profile = av_strdup("high"); + break; + } + } + + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; + + if (!strcmp(ctx->profile, "high")) { + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; + } else if (!strcmp(ctx->profile, "main")) { + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; + } else if (!strcmp(ctx->profile, "baseline")) { + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; + } else { + av_log(avctx, AV_LOG_WARNING, "Unknown profile requested: %s\n", ctx->profile); + } + + if (ctx->gobpattern >= 0) { + ctx->encodeConfig.frameIntervalP = 1; + } + + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1; + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1; + + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = avctx->colorspace; + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = avctx->color_primaries; + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = avctx->color_trc; + + ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = avctx->color_range == AVCOL_RANGE_JPEG; + + ctx->encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; + + nvStatus = ff_pNvEnc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->initEncodeParams); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", (int)nvStatus); + goto error; + } + + ctx->inputSurfaces = (NvencInputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencInputSurface)); + ctx->outputSurfaces = (NvencOutputSurface*)calloc(ctx->maxSurfaceCount, sizeof(NvencOutputSurface)); + + for (surfaceCount = 0; surfaceCount < ctx->maxSurfaceCount; ++surfaceCount) { + NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 }; + NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; + allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER; + allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; + + allocSurf.width = (avctx->width + 31) & ~31; + allocSurf.height = (avctx->height + 31) & ~31; + + allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; + + switch (avctx->pix_fmt) { + case AV_PIX_FMT_YUV420P: + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL; + break; + + case AV_PIX_FMT_NV12: + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL; + break; + + case AV_PIX_FMT_YUV444P: + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL; + break; + + default: + av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n"); + goto error; + } + + nvStatus = ff_pNvEnc->nvEncCreateInputBuffer(ctx->nvencoder, &allocSurf); + if (nvStatus = NV_ENC_SUCCESS){ + av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n"); + goto error; + } + + ctx->inputSurfaces[surfaceCount].lockCount = 0; + ctx->inputSurfaces[surfaceCount].inputSurface = allocSurf.inputBuffer; + ctx->inputSurfaces[surfaceCount].format = allocSurf.bufferFmt; + ctx->inputSurfaces[surfaceCount].width = allocSurf.width; + ctx->inputSurfaces[surfaceCount].height = allocSurf.height; + + allocOut.size = 1024 * 1024; + allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; + + nvStatus = ff_pNvEnc->nvEncCreateBitstreamBuffer(ctx->nvencoder, &allocOut); + if (nvStatus = NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n"); + ctx->outputSurfaces[surfaceCount++].outputSurface = 0; + goto error; + } + + ctx->outputSurfaces[surfaceCount].outputSurface = allocOut.bitstreamBuffer; + ctx->outputSurfaces[surfaceCount].size = allocOut.size; + ctx->outputSurfaces[surfaceCount].busy = 0; + } + + if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) { + uint32_t outSize = 0; + char tmpHeader[256]; + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 }; + payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; + + payload.spsppsBuffer = tmpHeader; + payload.inBufferSize = 256; + payload.outSPSPPSPayloadSize = &outSize; + + nvStatus = ff_pNvEnc->nvEncGetSequenceParams(ctx->nvencoder, &payload); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n"); + goto error; + } + + avctx->extradata_size = outSize; + avctx->extradata = av_mallocz(outSize + FF_INPUT_BUFFER_PADDING_SIZE); + + memcpy(avctx->extradata, tmpHeader, outSize); + } else { + avctx->extradata = 0; + avctx->extradata_size = 0; + } + + if (ctx->encodeConfig.frameIntervalP > 1) + avctx->has_b_frames = 2; + + if (ctx->encodeConfig.rcParams.averageBitRate > 0) + avctx->bit_rate = ctx->encodeConfig.rcParams.averageBitRate; + + return 0; + +error: + + for (i = 0; i < surfaceCount; ++i) { + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface); + if (ctx->outputSurfaces[i].outputSurface) + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface); + } + + if (ctx->nvencoder) + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder); + + if (ctx->cuContext) + ff_cuCtxDestroy(ctx->cuContext); + + ff_nvenc_unload_nvenc(avctx); + + ctx->nvencoder = 0; + ctx->cuContext = 0; + + return AVERROR_EXTERNAL; +} + +static av_cold int nvenc_encode_close(AVCodecContext *avctx) +{ + NvencContext *ctx = avctx->priv_data; + int i; + + if (ctx->profile) + av_freep(&ctx->profile); + + if (avctx->extradata) + av_freep(&avctx->extradata); + + while (ctx->timestampList) + timestamp_list_get_lowest(&ctx->timestampList); + + while (ctx->outputSurfaceReadyQueue) + out_surf_queue_pop(&ctx->outputSurfaceReadyQueue); + + while (ctx->outputSurfaceQueue) + out_surf_queue_pop(&ctx->outputSurfaceQueue); + + for (i = 0; i < ctx->maxSurfaceCount; ++i) { + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, ctx->inputSurfaces[i].inputSurface); + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, ctx->outputSurfaces[i].outputSurface); + } + ctx->maxSurfaceCount = 0; + + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder); + ctx->nvencoder = 0; + + ff_cuCtxDestroy(ctx->cuContext); + ctx->cuContext = 0; + + ff_nvenc_unload_nvenc(avctx); + + av_frame_free(&avctx->coded_frame); + + return 0; +} + +static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf) +{ + NvencContext *ctx = avctx->priv_data; + uint32_t *sliceOffsets = (uint32_t*)calloc(ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData, sizeof(uint32_t)); + NV_ENC_LOCK_BITSTREAM lockParams = { 0 }; + NVENCSTATUS nvStatus; + + lockParams.version = NV_ENC_LOCK_BITSTREAM_VER; + + lockParams.doNotWait = 0; + lockParams.outputBitstream = tmpoutsurf->outputSurface; + lockParams.sliceOffsets = sliceOffsets; + + nvStatus = ff_pNvEnc->nvEncLockBitstream(ctx->nvencoder, &lockParams); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n"); + timestamp_list_get_lowest(&ctx->timestampList); + return 0; + } + + if (ff_alloc_packet2(avctx, pkt, lockParams.bitstreamSizeInBytes) < 0) { + ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface); + timestamp_list_get_lowest(&ctx->timestampList); + return 0; + } + + memcpy(pkt->data, lockParams.bitstreamBufferPtr, lockParams.bitstreamSizeInBytes); + + nvStatus = ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, tmpoutsurf->outputSurface); + if (nvStatus != NV_ENC_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, expect the gates of mordor to open\n"); + + switch (lockParams.pictureType) { + case NV_ENC_PIC_TYPE_IDR: + pkt->flags |= AV_PKT_FLAG_KEY; + case NV_ENC_PIC_TYPE_I: + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; + break; + + case NV_ENC_PIC_TYPE_P: + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P; + break; + + case NV_ENC_PIC_TYPE_B: + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B; + break; + + case NV_ENC_PIC_TYPE_BI: + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI; + break; + + default: + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE; + break; + } + + pkt->pts = lockParams.outputTimeStamp; + pkt->dts = timestamp_list_get_lowest(&ctx->timestampList) - ctx->encodeConfig.frameIntervalP; + + if (pkt->dts > pkt->pts) + pkt->dts = pkt->pts; + + if (ctx->lastDts != AV_NOPTS_VALUE && pkt->dts <= ctx->lastDts) + pkt->dts = ctx->lastDts + 1; + + ctx->lastDts = pkt->dts; + + return 1; +} + +static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, + const AVFrame *frame, int *got_packet) +{ + NVENCSTATUS nvStatus; + NvencContext *ctx = avctx->priv_data; + NvencOutputSurface *tmpoutsurf; + int i = 0; + + NV_ENC_PIC_PARAMS picParams = { 0 }; + picParams.version = NV_ENC_PIC_PARAMS_VER; + + if (frame) { + NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 }; + NvencInputSurface *inSurf = 0; + + for (i = 0; i < ctx->maxSurfaceCount; ++i) + if (!ctx->inputSurfaces[i].lockCount) + inSurf = &ctx->inputSurfaces[i]; + av_assert0(inSurf); + + inSurf->lockCount = 1; + + lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER; + lockBufferParams.inputBuffer = inSurf->inputSurface; + + nvStatus = ff_pNvEnc->nvEncLockInputBuffer(ctx->nvencoder, &lockBufferParams); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input buffer\n"); + return 0; + } + + if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) { + uint8_t *buf = lockBufferParams.bufferDataPtr; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[0], frame->linesize[0], + avctx->width, avctx->height); + + buf += inSurf->height * lockBufferParams.pitch; + + av_image_copy_plane(buf, lockBufferParams.pitch >> 1, + frame->data[2], frame->linesize[2], + avctx->width >> 1, avctx->height >> 1); + + buf += (inSurf->height * lockBufferParams.pitch) >> 2; + + av_image_copy_plane(buf, lockBufferParams.pitch >> 1, + frame->data[1], frame->linesize[1], + avctx->width >> 1, avctx->height >> 1); + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { + uint8_t *buf = lockBufferParams.bufferDataPtr; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[0], frame->linesize[0], + avctx->width, avctx->height); + + buf += inSurf->height * lockBufferParams.pitch; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[1], frame->linesize[1], + avctx->width, avctx->height >> 1); + } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) { + uint8_t *buf = lockBufferParams.bufferDataPtr; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[0], frame->linesize[0], + avctx->width, avctx->height); + + buf += inSurf->height * lockBufferParams.pitch; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[1], frame->linesize[1], + avctx->width, avctx->height); + + buf += inSurf->height * lockBufferParams.pitch; + + av_image_copy_plane(buf, lockBufferParams.pitch, + frame->data[2], frame->linesize[2], + avctx->width, avctx->height); + } else { + av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n"); + return AVERROR(EINVAL); + } + + nvStatus = ff_pNvEnc->nvEncUnlockInputBuffer(ctx->nvencoder, inSurf->inputSurface); + if (nvStatus != NV_ENC_SUCCESS) { + av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n"); + return AVERROR_EXTERNAL; + } + + for (i = 0; i < ctx->maxSurfaceCount; ++i) + if (!ctx->outputSurfaces[i].busy) + break; + + if (i == ctx->maxSurfaceCount) { + inSurf->lockCount = 0; + av_log(avctx, AV_LOG_ERROR, "No free output surface found!\n"); + return 0; + } + + ctx->outputSurfaces[i].inputSurface = inSurf; + + picParams.inputBuffer = inSurf->inputSurface; + picParams.bufferFmt = inSurf->format; + picParams.inputWidth = avctx->width; + picParams.inputHeight = avctx->height; + picParams.outputBitstream = ctx->outputSurfaces[i].outputSurface; + picParams.completionEvent = 0; + + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { + if (frame->top_field_first) { + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; + } else { + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; + } + } else { + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + } + + picParams.encodePicFlags = 0; + picParams.inputTimeStamp = frame->pts; + picParams.inputDuration = 0; + picParams.codecPicParams.h264PicParams.sliceMode = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceMode; + picParams.codecPicParams.h264PicParams.sliceModeData = ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData; + memcpy(&picParams.rcParams, &ctx->encodeConfig.rcParams, sizeof(NV_ENC_RC_PARAMS)); + + timestamp_list_insert_sorted(&ctx->timestampList, frame->pts); + } else { + picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS; + } + + nvStatus = ff_pNvEnc->nvEncEncodePicture(ctx->nvencoder, &picParams); + + if (frame && nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) { + out_surf_queue_push(&ctx->outputSurfaceQueue, &ctx->outputSurfaces[i]); + ctx->outputSurfaces[i].busy = 1; + } + + if (nvStatus != NV_ENC_SUCCESS && nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) { + av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n"); + return AVERROR_EXTERNAL; + } + + if (nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) { + while (ctx->outputSurfaceQueue) { + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceQueue); + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, tmpoutsurf); + } + + if (frame) { + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, &ctx->outputSurfaces[i]); + ctx->outputSurfaces[i].busy = 1; + } + } + + if (ctx->outputSurfaceReadyQueue) { + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceReadyQueue); + + *got_packet = process_output_surface(avctx, pkt, avctx->coded_frame, tmpoutsurf); + + tmpoutsurf->busy = 0; + av_assert0(tmpoutsurf->inputSurface->lockCount); + tmpoutsurf->inputSurface->lockCount--; + } + + return 0; +} + +static int pix_fmts_nvenc_initialized; + +static enum AVPixelFormat pix_fmts_nvenc[] = { + AV_PIX_FMT_NV12, + AV_PIX_FMT_NONE, + AV_PIX_FMT_NONE, + AV_PIX_FMT_NONE +}; + +static av_cold void nvenc_init_static(AVCodec *codec) +{ + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 }; + CUcontext cuctxcur = 0, cuctx = 0; + NVENCSTATUS nvStatus; + void *nvencoder = 0; + GUID encodeGuid = NV_ENC_CODEC_H264_GUID; + GUID license = dummy_license; + int i = 0, pos = 0; + int gotnv12 = 0, got420 = 0, got444 = 0; + uint32_t inputFmtCount = 32; + NV_ENC_BUFFER_FORMAT inputFmts[32]; + + for (i = 0; i < 32; ++i) + inputFmts[i] = (NV_ENC_BUFFER_FORMAT)0; + i = 0; + + if (pix_fmts_nvenc_initialized) { + codec->pix_fmts = pix_fmts_nvenc; + return; + } + + if (!ff_nvenc_dyload_nvenc(0)) { + pix_fmts_nvenc_initialized = 1; + return; + } + + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION; + stEncodeSessionParams.clientKeyPtr = &license; + + cuctx = 0; + if (ff_cuCtxCreate(&cuctx, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS) { + cuctx = 0; + goto error; + } + + if (ff_cuCtxPopCurrent(&cuctxcur) != CUDA_SUCCESS) + goto error; + + stEncodeSessionParams.device = (void*)cuctx; + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA; + + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, &nvencoder); + if (nvStatus != NV_ENC_SUCCESS) { + nvencoder = 0; + goto error; + } + + nvStatus = ff_pNvEnc->nvEncGetInputFormats(nvencoder, encodeGuid, inputFmts, 32, &inputFmtCount); + if (nvStatus != NV_ENC_SUCCESS) + goto error; + + pos = 0; + for (i = 0; i < inputFmtCount && pos < 3; ++i) { + if (!gotnv12 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_PL + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED16x16 + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED64x16)) { + + pix_fmts_nvenc[pos++] = AV_PIX_FMT_NV12; + gotnv12 = 1; + } else if (!got420 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_PL + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED16x16 + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED64x16)) { + + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV420P; + got420 = 1; + } else if (!got444 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_PL + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16 + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16)) { + + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV444P; + got444 = 1; + } + } + + pix_fmts_nvenc[pos] = AV_PIX_FMT_NONE; + + pix_fmts_nvenc_initialized = 1; + codec->pix_fmts = pix_fmts_nvenc; + + ff_pNvEnc->nvEncDestroyEncoder(nvencoder); + ff_cuCtxDestroy(cuctx); + + ff_nvenc_unload_nvenc(0); + + return; + +error: + + if (nvencoder) + ff_pNvEnc->nvEncDestroyEncoder(nvencoder); + + if (cuctx) + ff_cuCtxDestroy(cuctx); + + pix_fmts_nvenc_initialized = 1; + pix_fmts_nvenc[0] = AV_PIX_FMT_NV12; + pix_fmts_nvenc[1] = AV_PIX_FMT_NONE; + + codec->pix_fmts = pix_fmts_nvenc; + + ff_nvenc_unload_nvenc(0); +} + +#define OFFSET(x) offsetof(NvencContext, x) +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM +static const AVOption options[] = { + { "profile", "Set profile restrictions", OFFSET(profile), AV_OPT_TYPE_STRING, { .str = "high" }, 0, 0, VE}, + { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE }, + { "cqp", "Constant quantization parameter rate control method", OFFSET(cqp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, + { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE }, + { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, + { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE }, + { NULL } +}; + +static const AVClass nvenc_class = { + .class_name = "nvenc", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVCodecDefault nvenc_defaults[] = { + { "b", "0" }, + { "qmin", "-1" }, + { "qmax", "-1" }, + { "qdiff", "-1" }, + { "qblur", "-1" }, + { "qcomp", "-1" }, + { NULL }, +}; + +AVCodec ff_nvenc_encoder = { + .name = "nvenc", + .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"), + .type = AVMEDIA_TYPE_VIDEO, + .id = AV_CODEC_ID_H264, + .priv_data_size = sizeof(NvencContext), + .init = nvenc_encode_init, + .encode2 = nvenc_encode_frame, + .close = nvenc_encode_close, + .capabilities = CODEC_CAP_DELAY, + .priv_class = &nvenc_class, + .defaults = nvenc_defaults, + .init_static_data = nvenc_init_static +}; diff --git a/libavcodec/nvenc_api.c b/libavcodec/nvenc_api.c new file mode 100644 index 0000000..53d5fa8 --- /dev/null +++ b/libavcodec/nvenc_api.c @@ -0,0 +1,275 @@ +/* + * H.264 hardware encoding using nvidia nvenc + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef _WIN32 +#include <windows.h> +#else +#include <dlfcn.h> +#endif + +#include "libavutil/avassert.h" +#include "avcodec.h" +#include "internal.h" + +#include "nvenc_cuda.h" +#include "nvenc_api.h" + +PCUINIT ff_cuInit = 0; +PCUDEVICEGETCOUNT ff_cuDeviceGetCount = 0; +PCUDEVICEGET ff_cuDeviceGet = 0; +PCUDEVICEGETNAME ff_cuDeviceGetName = 0; +PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability = 0; +PCUCTXCREATE ff_cuCtxCreate = 0; +PCUCTXPOPCURRENT ff_cuCtxPopCurrent = 0; +PCUCTXDESTROY ff_cuCtxDestroy = 0; + +static int nvenc_init_count; +static NV_ENCODE_API_FUNCTION_LIST nvEncFuncs; +NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc = 0; +int ff_iNvencDeviceCount = 0; +CUdevice ff_pNvencDevices[16]; +unsigned int ff_iNvencUseDeviceID = 0; + +#ifdef _WIN32 +#define LOAD_FUNC(l, s) GetProcAddress(l, s) +#define DL_CLOSE_FUNC(l) FreeLibrary(l) +static HMODULE cudaLib; +static HMODULE nvEncLib; +#else +#define LOAD_FUNC(l, s) dlsym(l, s) +#define DL_CLOSE_FUNC(l) dlclose(l) +static void *cudaLib; +static void *nvEncLib; +#endif + +#define ifav_log(...) if (avctx) { av_log(__VA_ARGS__); } + +#define CHECK_LOAD_FUNC(t, f, s) \ +{ \ + f = (t)LOAD_FUNC(cudaLib, s); \ + if (!f) { \ + ifav_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \ + goto error; \ + } \ +} + +static int nvenc_dyload_cuda(AVCodecContext *avctx) +{ + if (cudaLib) + return 1; + +#if defined(_WIN32) + cudaLib = LoadLibrary(TEXT("nvcuda.dll")); +#elif defined(__CYGWIN__) + cudaLib = dlopen("nvcuda.dll", RTLD_LAZY); +#else + cudaLib = dlopen("libcuda.so", RTLD_LAZY); +#endif + + if (!cudaLib) { + ifav_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n"); + goto error; + } + + CHECK_LOAD_FUNC(PCUINIT, ff_cuInit, "cuInit"); + CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, ff_cuDeviceGetCount, "cuDeviceGetCount"); + CHECK_LOAD_FUNC(PCUDEVICEGET, ff_cuDeviceGet, "cuDeviceGet"); + CHECK_LOAD_FUNC(PCUDEVICEGETNAME, ff_cuDeviceGetName, "cuDeviceGetName"); + CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, ff_cuDeviceComputeCapability, "cuDeviceComputeCapability"); + CHECK_LOAD_FUNC(PCUCTXCREATE, ff_cuCtxCreate, "cuCtxCreate_v2"); + CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, ff_cuCtxPopCurrent, "cuCtxPopCurrent_v2"); + CHECK_LOAD_FUNC(PCUCTXDESTROY, ff_cuCtxDestroy, "cuCtxDestroy_v2"); + + return 1; + +error: + + if (cudaLib) + DL_CLOSE_FUNC(cudaLib); + + cudaLib = 0; + + return 0; +} + +static int checkCudaErrors(AVCodecContext *avctx, CUresult err, const char *func) +{ + if (err != CUDA_SUCCESS) { + ifav_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code 0x%x\n", func, err); + return 0; + } + return 1; +} +#define checkCudaErrors(f) if (!checkCudaErrors(avctx, f, #f)) goto error + +static int nvenc_check_cuda(AVCodecContext *avctx) +{ + int deviceCount = 0; + CUdevice cuDevice = 0; + char gpu_name[128]; + int SMminor = 0, SMmajor = 0; + int i, smver; + + if (!nvenc_dyload_cuda(avctx)) + return 0; + + if (ff_iNvencDeviceCount > 0) + return 1; + + checkCudaErrors(ff_cuInit(0)); + + checkCudaErrors(ff_cuDeviceGetCount(&deviceCount)); + + if (!deviceCount) { + ifav_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); + goto error; + } + + ifav_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", deviceCount); + + ff_iNvencDeviceCount = 0; + + for (i = 0; i < deviceCount; ++i) { + checkCudaErrors(ff_cuDeviceGet(&cuDevice, i)); + checkCudaErrors(ff_cuDeviceGetName(gpu_name, 128, cuDevice)); + checkCudaErrors(ff_cuDeviceComputeCapability(&SMmajor, &SMminor, cuDevice)); + + smver = (SMmajor << 4) | SMminor; + + ifav_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM %d.%d, NVENC %s ]\n", i, gpu_name, SMmajor, SMminor, (smver >= 0x30) ? "Available" : "Not Available"); + + if (smver >= 0x30) + ff_pNvencDevices[ff_iNvencDeviceCount++] = cuDevice; + } + + if (!ff_iNvencDeviceCount) { + ifav_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n"); + goto error; + } + + return 1; + +error: + + ff_iNvencDeviceCount = 0; + + return 0; +} + +av_cold int ff_nvenc_dyload_nvenc(AVCodecContext *avctx) +{ + PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0; + NVENCSTATUS nvstatus; + + if (!nvenc_check_cuda(avctx)) + return 0; + + if (ff_pNvEnc) { + nvenc_init_count++; + return 1; + } + +#if defined(_WIN32) + if (sizeof(void*) == 8) { + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI64.dll")); + } else { + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI.dll")); + } +#elif defined(__CYGWIN__) + if (sizeof(void*) == 8) { + nvEncLib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY); + } else { + nvEncLib = dlopen("nvEncodeAPI.dll", RTLD_LAZY); + } +#else + nvEncLib = dlopen("libnvidia-encode.so", RTLD_LAZY); +#endif + + if (!nvEncLib) { + ifav_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n"); + goto error; + } + + nvEncodeAPICreateInstance = (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(nvEncLib, "NvEncodeAPICreateInstance"); + + if (!nvEncodeAPICreateInstance) { + ifav_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n"); + goto error; + } + + ff_pNvEnc = &nvEncFuncs; + memset(ff_pNvEnc, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST)); + ff_pNvEnc->version = NV_ENCODE_API_FUNCTION_LIST_VER; + + nvstatus = nvEncodeAPICreateInstance(ff_pNvEnc); + + if (nvstatus != NV_ENC_SUCCESS) { + ifav_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n"); + goto error; + } + + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n"); + + nvenc_init_count = 1; + + return 1; + +error: + if (nvEncLib) + DL_CLOSE_FUNC(nvEncLib); + + nvEncLib = 0; + ff_pNvEnc = 0; + nvenc_init_count = 0; + + return 0; +} + +av_cold void ff_nvenc_unload_nvenc(AVCodecContext *avctx) +{ + if (nvenc_init_count <= 0) + return; + + nvenc_init_count--; + + if (nvenc_init_count > 0) + return; + + DL_CLOSE_FUNC(nvEncLib); + nvEncLib = 0; + ff_pNvEnc = 0; + + ff_iNvencDeviceCount = 0; + + DL_CLOSE_FUNC(cudaLib); + cudaLib = 0; + + ff_cuInit = 0; + ff_cuDeviceGetCount = 0; + ff_cuDeviceGet = 0; + ff_cuDeviceGetName = 0; + ff_cuDeviceComputeCapability = 0; + ff_cuCtxCreate = 0; + ff_cuCtxPopCurrent = 0; + ff_cuCtxDestroy = 0; + + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n"); +} diff --git a/libavcodec/nvenc_api.h b/libavcodec/nvenc_api.h new file mode 100644 index 0000000..16b1c72 --- /dev/null +++ b/libavcodec/nvenc_api.h @@ -0,0 +1,35 @@ +/* + * H.264 hardware encoding using nvidia nvenc + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_NVENC_API_H +#define AVCODEC_NVENC_API_H + +#include <nvEncodeAPI.h> + + +typedef NVENCSTATUS (NVENCAPI* PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList); + +extern NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc; + +int ff_nvenc_dyload_nvenc(AVCodecContext *avctx); +void ff_nvenc_unload_nvenc(AVCodecContext *avctx); + +#endif diff --git a/libavcodec/nvenc_cuda.h b/libavcodec/nvenc_cuda.h new file mode 100644 index 0000000..ae43a22 --- /dev/null +++ b/libavcodec/nvenc_cuda.h @@ -0,0 +1,62 @@ +/* + * H.264 hardware encoding using nvidia nvenc + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_NVENC_CUDA_H +#define AVCODEC_NVENC_CUDA_H + +typedef enum cudaError_enum { + CUDA_SUCCESS = 0 +} CUresult; +typedef int CUdevice; +typedef void* CUcontext; + +#ifdef _WIN32 +#define CUDAAPI __stdcall +#else +#define CUDAAPI +#endif + +typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags); +typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count); +typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal); +typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice dev); +typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int *minor, CUdevice dev); +typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, CUdevice dev); +typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx); +typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx); + +extern PCUINIT ff_cuInit; +extern PCUDEVICEGETCOUNT ff_cuDeviceGetCount; +extern PCUDEVICEGET ff_cuDeviceGet; +extern PCUDEVICEGETNAME ff_cuDeviceGetName; +extern PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability; +extern PCUCTXCREATE ff_cuCtxCreate; +extern PCUCTXPOPCURRENT ff_cuCtxPopCurrent; +extern PCUCTXDESTROY ff_cuCtxDestroy; + +int ff_nvenc_dyload_cuda(AVCodecContext *avctx); +int ff_nvenc_check_cuda(AVCodecContext *avctx); + +extern int ff_iNvencDeviceCount; +extern CUdevice ff_pNvencDevices[16]; +extern unsigned int ff_iNvencUseDeviceID; + +#endif
signature.asc
Description: OpenPGP digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel