Le sextidi 6 frimaire, an CCXXIII, Timo Rothenpieler a écrit : > It uses init_static_data to dynamicaly ask the nvidia driver for the > supported pixel formats instead.
It means it will try to load and init the library whenever libavcodec is used, even if this specific encoder is not used. For a library that accesses hardware devices, that may not be a good idea. Below, a few quick comments that became a lot of comments; I do not know the API itself. > From 793271822a5f52c3aed876fcedc7c6d8edd3c10c Mon Sep 17 00:00:00 2001 > From: Timo Rothenpieler <t...@rothenpieler.org> > Date: Wed, 26 Nov 2014 11:08:11 +0100 > Subject: [PATCH] Add NVENC encoder > > --- > Changelog | 1 + > configure | 12 +- > libavcodec/Makefile | 1 + > libavcodec/allcodecs.c | 1 + > libavcodec/nvenc.c | 932 > ++++++++++++++++++++++++++++++++++++++++++++++++ > libavcodec/nvenc_api.c | 275 ++++++++++++++ > libavcodec/nvenc_api.h | 35 ++ > libavcodec/nvenc_cuda.h | 62 ++++ Is it necessary to split the _api part in a separate file? The whole code is a bit large, but still manageable, and merging the files would avoid some headers overhead. > 8 files changed, 1317 insertions(+), 2 deletions(-) > create mode 100644 libavcodec/nvenc.c > create mode 100644 libavcodec/nvenc_api.c > create mode 100644 libavcodec/nvenc_api.h > create mode 100644 libavcodec/nvenc_cuda.h > > diff --git a/Changelog b/Changelog > index 7172d0c..d26b7fa 100644 > --- a/Changelog > +++ b/Changelog > @@ -17,6 +17,7 @@ version <next>: > - WebP muxer with animated WebP support > - zygoaudio decoding support > - APNG demuxer > +- nvenc encoder > > > version 2.4: > diff --git a/configure b/configure > index 38619c4..05bce5d 100755 > --- a/configure > +++ b/configure > @@ -261,6 +261,7 @@ External library support: > --enable-libzvbi enable teletext support via libzvbi [no] > --disable-lzma disable lzma [autodetect] > --enable-decklink enable Blackmagick DeckLink I/O support [no] > + --enable-nvenc enable NVIDIA NVENC support [no] > --enable-openal enable OpenAL 1.1 capture support [no] > --enable-opencl enable OpenCL code > --enable-opengl enable OpenGL rendering [no] > @@ -1393,6 +1394,7 @@ EXTERNAL_LIBRARY_LIST=" > libzmq > libzvbi > lzma > + nvenc > openal > opencl > opengl > @@ -2389,6 +2391,7 @@ libxvid_encoder_deps="libxvid" > libutvideo_decoder_deps="libutvideo" > libutvideo_encoder_deps="libutvideo" > libzvbi_teletext_decoder_deps="libzvbi" > +nvenc_encoder_deps="nvenc" > > # demuxers / muxers > ac3_demuxer_select="ac3_parser" > @@ -2569,9 +2572,7 @@ drawtext_filter_deps="libfreetype" > ebur128_filter_deps="gpl" > flite_filter_deps="libflite" > frei0r_filter_deps="frei0r dlopen" > -frei0r_filter_extralibs='$ldl' > frei0r_src_filter_deps="frei0r dlopen" > -frei0r_src_filter_extralibs='$ldl' > geq_filter_deps="gpl" > histeq_filter_deps="gpl" > hqdn3d_filter_deps="gpl" > @@ -4344,6 +4345,7 @@ die_license_disabled gpl x11grab > > die_license_disabled nonfree libaacplus > die_license_disabled nonfree libfaac > +die_license_disabled nonfree nvenc > enabled gpl && die_license_disabled_gpl nonfree libfdk_aac > enabled gpl && die_license_disabled_gpl nonfree openssl > > @@ -4650,6 +4652,11 @@ elif check_func dlopen -ldl; then > ldl=-ldl > fi > > +# set a few flags which depend on ldl and can't be set earlier > +nvenc_encoder_extralibs='$ldl' > +frei0r_filter_extralibs='$ldl' > +frei0r_src_filter_extralibs='$ldl' I think moving the frei0r rules is supposed to belong in a separate patch. > + > if ! disabled network; then > check_func getaddrinfo $network_extralibs > check_func getservbyport $network_extralibs > @@ -4913,6 +4920,7 @@ enabled libxavs && require libxavs xavs.h > xavs_encoder_encode -lxavs > enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore > enabled libzmq && require_pkg_config libzmq zmq.h zmq_ctx_new > enabled libzvbi && require libzvbi libzvbi.h vbi_decoder_new -lzvbi > +enabled nvenc && { check_header nvEncodeAPI.h || die "ERROR: > nvEncodeAPI.h not found."; } > enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" > "-lOpenAL32"; do > check_lib 'AL/al.h' alGetError "${al_libs}" > && break; done } || > die "ERROR: openal not found"; } && > diff --git a/libavcodec/Makefile b/libavcodec/Makefile > index fa0f53d..cc41564 100644 > --- a/libavcodec/Makefile > +++ b/libavcodec/Makefile > @@ -347,6 +347,7 @@ OBJS-$(CONFIG_MXPEG_DECODER) += mxpegdec.o > OBJS-$(CONFIG_NELLYMOSER_DECODER) += nellymoserdec.o nellymoser.o > OBJS-$(CONFIG_NELLYMOSER_ENCODER) += nellymoserenc.o nellymoser.o > OBJS-$(CONFIG_NUV_DECODER) += nuv.o rtjpeg.o > +OBJS-$(CONFIG_NVENC_ENCODER) += nvenc.o nvenc_api.o > OBJS-$(CONFIG_ON2AVC_DECODER) += on2avc.o on2avcdata.o > OBJS-$(CONFIG_OPUS_DECODER) += opusdec.o opus.o opus_celt.o \ > opus_imdct.o opus_silk.o \ > diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c > index 0d39d33..8ceee2f 100644 > --- a/libavcodec/allcodecs.c > +++ b/libavcodec/allcodecs.c > @@ -223,6 +223,7 @@ void avcodec_register_all(void) > REGISTER_DECODER(MVC2, mvc2); > REGISTER_DECODER(MXPEG, mxpeg); > REGISTER_DECODER(NUV, nuv); > + REGISTER_ENCODER(NVENC, nvenc); > REGISTER_DECODER(PAF_VIDEO, paf_video); > REGISTER_ENCDEC (PAM, pam); > REGISTER_ENCDEC (PBM, pbm); > diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c > new file mode 100644 > index 0000000..3cb98d3 > --- /dev/null > +++ b/libavcodec/nvenc.c > @@ -0,0 +1,932 @@ > +/* > + * H.264 hardware encoding using nvidia nvenc > + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifdef _WIN32 > +#include <windows.h> > +#endif > + > +#include "libavutil/internal.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/avassert.h" > +#include "libavutil/opt.h" > +#include "libavutil/mem.h" > +#include "avcodec.h" > +#include "internal.h" > + > +#include "nvenc_cuda.h" > +#include "nvenc_api.h" > + > +typedef struct NvencInputSurface > +{ > + NV_ENC_INPUT_PTR inputSurface; > + int width; > + int height; > + > + int lockCount; The usual coding style for structure members and variables in ffmpeg is names_separated_with_underscodes, not uglyCamelCase. (But I believe the person who will end up maintaining the file should have last word on this.) > + > + NV_ENC_BUFFER_FORMAT format; > +} NvencInputSurface; > + > +typedef struct NvencOutputSurface > +{ > + NV_ENC_OUTPUT_PTR outputSurface; > + int size; > + > + NvencInputSurface *inputSurface; > + > + int busy; > +} NvencOutputSurface; > + > +typedef struct NvencOutputSurfaceList > +{ > + NvencOutputSurface *surface; > + struct NvencOutputSurfaceList *next; > +} NvencOutputSurfaceList; > + > +typedef struct NvencTimestampList > +{ > + int64_t timestamp; > + struct NvencTimestampList *next; > +} NvencTimestampList; > + > +typedef struct NvencContext > +{ > + AVClass *avclass; > + > + NV_ENC_INITIALIZE_PARAMS initEncodeParams; > + NV_ENC_CONFIG encodeConfig; > + CUcontext cuContext; > + > + int maxSurfaceCount; > + NvencInputSurface *inputSurfaces; > + NvencOutputSurface *outputSurfaces; > + > + NvencOutputSurfaceList *outputSurfaceQueue; > + NvencOutputSurfaceList *outputSurfaceReadyQueue; > + NvencTimestampList *timestampList; > + int64_t lastDts; > + > + void *nvencoder; > + > + char *profile; > + char *preset; > + int cqp; > + int cbr; > + int twopass; > + int gobpattern; > +} NvencContext; > + > +static const GUID dummy_license = { 0x0, 0x0, 0x0, { 0x0, 0x0, 0x0, 0x0, > 0x0, 0x0, 0x0, 0x0 } }; > + > +static void out_surf_queue_push(NvencOutputSurfaceList** head, > NvencOutputSurface *surface) > +{ > + if (!*head) { > + *head = av_malloc(sizeof(NvencOutputSurfaceList)); > + (*head)->next = 0; ffmpeg code usually uses NULL for NULL pointers, not 0; other similar cases below. > + (*head)->surface = surface; > + return; > + } > + > + while ((*head)->next) > + head = &((*head)->next); This looks inefficient. Do you have an estimate of the usual size of the queue? I suggest you have a look at the dynarray (in libavutil/mem.h and dynarray.h) API. If you really need linked lists, you could probably keep the final pointer to head in the structure to avoid walking the list every time. > + > + (*head)->next = av_malloc(sizeof(NvencOutputSurfaceList)); av_malloc() return value needs to be checked. Other similar cases below. > + (*head)->next->next = 0; > + (*head)->next->surface = surface; > +} > + > +static NvencOutputSurface *out_surf_queue_pop(NvencOutputSurfaceList** head) If you call this one pop instead of shift, people used to Perl will be very confused. > +{ > + NvencOutputSurfaceList *tmp; > + NvencOutputSurface *res; > + > + if (!*head) > + return 0; > + > + tmp = *head; > + res = tmp->surface; > + *head = tmp->next; > + av_free(tmp); > + > + return res; > +} > + > +static void timestamp_list_insert_sorted(NvencTimestampList** head, int64_t > timestamp) Same as before: maybe dynarray would be more efficient, avoiding malloc() with its huge overhead for every insertion. Also, if the list is expected to be large, you may consider using a heap instead of a sorted list. > +{ > + NvencTimestampList *newelem; > + NvencTimestampList *prev; > + > + if (!*head) { > + *head = av_malloc(sizeof(NvencTimestampList)); > + (*head)->next = 0; > + (*head)->timestamp = timestamp; > + return; > + } > + > + prev = 0; > + while (*head && timestamp >= (*head)->timestamp) { > + prev = *head; > + head = &((*head)->next); > + } > + > + newelem = av_malloc(sizeof(NvencTimestampList)); > + newelem->next = *head; > + newelem->timestamp = timestamp; > + > + if (*head) { > + *head = newelem; > + } else { > + prev->next = newelem; > + } > +} > + > +static int64_t timestamp_list_get_lowest(NvencTimestampList** head) > +{ > + NvencTimestampList *tmp; > + int64_t res; > + > + if (!*head) > + return 0; > + > + tmp = *head; > + res = tmp->timestamp; > + *head = tmp->next; > + av_free(tmp); > + > + return res; > +} > + > +static av_cold int nvenc_encode_init(AVCodecContext *avctx) > +{ > + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 }; > + NV_ENC_PRESET_CONFIG presetConfig = { 0 }; > + CUcontext cuContextCurr; > + GUID encoderPreset = NV_ENC_PRESET_HQ_GUID; > + GUID license = dummy_license; > + NVENCSTATUS nvStatus = NV_ENC_SUCCESS; > + int surfaceCount = 0; > + int i, numMBs; > + int isLL = 0; > + > + NvencContext *ctx = avctx->priv_data; > + > + if (!ff_nvenc_dyload_nvenc(avctx)) > + return AVERROR_EXTERNAL; > + > + avctx->coded_frame = av_frame_alloc(); > + if (!avctx->coded_frame) > + return AVERROR(ENOMEM); > + > + memset(&ctx->initEncodeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS)); > + memset(&ctx->encodeConfig, 0, sizeof(NV_ENC_CONFIG)); Non needed, the whole structure is set to 0 by the library. > + > + ctx->outputSurfaceQueue = 0; > + ctx->outputSurfaceReadyQueue = 0; > + ctx->timestampList = 0; > + ctx->lastDts = AV_NOPTS_VALUE; > + ctx->nvencoder = 0; > + > + ctx->encodeConfig.version = NV_ENC_CONFIG_VER; > + ctx->initEncodeParams.version = NV_ENC_INITIALIZE_PARAMS_VER; > + presetConfig.version = NV_ENC_PRESET_CONFIG_VER; > + presetConfig.presetCfg.version = NV_ENC_CONFIG_VER; > + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; > + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION; > + stEncodeSessionParams.clientKeyPtr = &license; > + > + ctx->cuContext = 0; > + if (ff_cuCtxCreate(&ctx->cuContext, 0, > ff_pNvencDevices[ff_iNvencUseDeviceID]) != CUDA_SUCCESS > + || ff_cuCtxPopCurrent(&cuContextCurr) != CUDA_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "Failed creating CUDA context for > NVENC\n"); Is there a chance of getting a more detailed error reason? > + goto error; > + } > + > + stEncodeSessionParams.device = (void*)ctx->cuContext; > + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA; > + > + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, > &ctx->nvencoder); > + if (nvStatus != NV_ENC_SUCCESS) { > + ctx->nvencoder = 0; > + av_log(avctx, AV_LOG_FATAL, "OpenEncodeSessionEx failed: 0x%x - > invalid license key?\n", (int)nvStatus); > + goto error; > + } > + > + if (ctx->preset) { > + if (!strcmp(ctx->preset, "hp")) { > + encoderPreset = NV_ENC_PRESET_HP_GUID; > + } else if (!strcmp(ctx->preset, "hq")) { > + encoderPreset = NV_ENC_PRESET_HQ_GUID; > + } else if (!strcmp(ctx->preset, "bd")) { > + encoderPreset = NV_ENC_PRESET_BD_GUID; > + } else if (!strcmp(ctx->preset, "ll")) { > + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID; > + isLL = 1; > + } else if (!strcmp(ctx->preset, "llhp")) { > + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HP_GUID; > + isLL = 1; > + } else if (!strcmp(ctx->preset, "llhq")) { > + encoderPreset = NV_ENC_PRESET_LOW_LATENCY_HQ_GUID; > + isLL = 1; > + } else if (!strcmp(ctx->preset, "default")) { > + encoderPreset = NV_ENC_PRESET_DEFAULT_GUID; > + } else { > + av_log(avctx, AV_LOG_ERROR, "Preset \"%s\" is unknown!\n", > ctx->preset); Should return an error. And if you use a table with the list of presets, you can dump the list. > + } > + } > + > + nvStatus = ff_pNvEnc->nvEncGetEncodePresetConfig(ctx->nvencoder, > NV_ENC_CODEC_H264_GUID, encoderPreset, &presetConfig); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "GetEncodePresetConfig failed: 0x%x\n", > (int)nvStatus); > + goto error; > + } > + > + ctx->initEncodeParams.encodeGUID = NV_ENC_CODEC_H264_GUID; > + ctx->initEncodeParams.encodeHeight = avctx->height; > + ctx->initEncodeParams.encodeWidth = avctx->width; > + ctx->initEncodeParams.darHeight = avctx->height; > + ctx->initEncodeParams.darWidth = avctx->width; Was this tested with anamorphic videos? > + ctx->initEncodeParams.frameRateNum = avctx->time_base.den; > + ctx->initEncodeParams.frameRateDen = avctx->time_base.num * > avctx->ticks_per_frame; > + > + numMBs = ((avctx->width + 15) >> 4) * ((avctx->height + 15) >> 4); > + ctx->maxSurfaceCount = (numMBs >= 8160) ? 16 : 32; > + > + ctx->initEncodeParams.enableEncodeAsync = 0; > + ctx->initEncodeParams.enablePTD = 1; > + > + ctx->initEncodeParams.presetGUID = encoderPreset; > + > + ctx->initEncodeParams.encodeConfig = &ctx->encodeConfig; > + memcpy(&ctx->encodeConfig, &presetConfig.presetCfg, > sizeof(NV_ENC_CONFIG)); > + ctx->encodeConfig.version = NV_ENC_CONFIG_VER; > + > + if (avctx->gop_size >= 0) { > + ctx->encodeConfig.gopLength = avctx->gop_size; > + ctx->encodeConfig.encodeCodecConfig.h264Config.idrPeriod = > avctx->gop_size; > + } > + > + if (avctx->bit_rate > 0) > + ctx->encodeConfig.rcParams.averageBitRate = avctx->bit_rate; > + > + if (avctx->rc_max_rate > 0) > + ctx->encodeConfig.rcParams.maxBitRate = avctx->rc_max_rate; > + > + if (ctx->cbr) { > + if (!ctx->twopass) { > + ctx->encodeConfig.rcParams.rateControlMode = > NV_ENC_PARAMS_RC_CBR; > + } else if (ctx->twopass == 1 || isLL) { > + ctx->encodeConfig.rcParams.rateControlMode = > NV_ENC_PARAMS_RC_2_PASS_QUALITY; > + > + > ctx->encodeConfig.encodeCodecConfig.h264Config.adaptiveTransformMode = > NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE; > + ctx->encodeConfig.encodeCodecConfig.h264Config.fmoMode = > NV_ENC_H264_FMO_DISABLE; > + > + if (!isLL) > + av_log(avctx, AV_LOG_WARNING, "Twopass mode is only known to > work with low latency (ll, llhq, llhp) presets.\n"); > + } else { > + ctx->encodeConfig.rcParams.rateControlMode = > NV_ENC_PARAMS_RC_CBR; > + } > + } else if (ctx->cqp >= 0) { > + ctx->encodeConfig.rcParams.rateControlMode = > NV_ENC_PARAMS_RC_CONSTQP; > + ctx->encodeConfig.rcParams.constQP.qpInterB = ctx->cqp; > + ctx->encodeConfig.rcParams.constQP.qpInterP = ctx->cqp; > + ctx->encodeConfig.rcParams.constQP.qpIntra = ctx->cqp; > + > + avctx->qmin = -1; > + avctx->qmax = -1; > + } else if (avctx->qmin >= 0 && avctx->qmax >= 0) { > + ctx->encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_VBR; > + > + ctx->encodeConfig.rcParams.enableMinQP = 1; > + ctx->encodeConfig.rcParams.enableMaxQP = 1; > + > + ctx->encodeConfig.rcParams.minQP.qpInterB = avctx->qmin; > + ctx->encodeConfig.rcParams.minQP.qpInterP = avctx->qmin; > + ctx->encodeConfig.rcParams.minQP.qpIntra = avctx->qmin; > + > + ctx->encodeConfig.rcParams.maxQP.qpInterB = avctx->qmax; > + ctx->encodeConfig.rcParams.maxQP.qpInterP = avctx->qmax; > + ctx->encodeConfig.rcParams.maxQP.qpIntra = avctx->qmax; > + } > + > + if (avctx->rc_buffer_size > 0) > + ctx->encodeConfig.rcParams.vbvBufferSize = avctx->rc_buffer_size; > + > + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { > + ctx->encodeConfig.frameFieldMode = > NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD; > + } else { > + ctx->encodeConfig.frameFieldMode = > NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME; > + } > + > + if (!ctx->profile) { > + switch (avctx->profile) { > + case FF_PROFILE_H264_BASELINE: case is usually intended the same as switch. > + ctx->profile = av_strdup("baseline"); Need to check the return value. But it seems you have the private option "profile" conflicting with the global option "profile", which is confusing, and possibly problematic, for users. > + break; > + case FF_PROFILE_H264_MAIN: > + ctx->profile = av_strdup("main"); > + break; > + default: > + ctx->profile = av_strdup("high"); > + break; > + } > + } > + > + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; > + > + if (!strcmp(ctx->profile, "high")) { > + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; > + } else if (!strcmp(ctx->profile, "main")) { > + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; > + } else if (!strcmp(ctx->profile, "baseline")) { > + ctx->encodeConfig.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; > + } else { > + av_log(avctx, AV_LOG_WARNING, "Unknown profile requested: %s\n", > ctx->profile); > + } > + > + if (ctx->gobpattern >= 0) { > + ctx->encodeConfig.frameIntervalP = 1; > + } > + > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag > = 1; > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag > = 1; > + > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix > = avctx->colorspace; > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries > = avctx->color_primaries; > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics > = avctx->color_trc; > + > + > ctx->encodeConfig.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag > = avctx->color_range == AVCOL_RANGE_JPEG; > + > + ctx->encodeConfig.encodeCodecConfig.h264Config.disableSPSPPS = > (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) ? 1 : 0; > + > + nvStatus = ff_pNvEnc->nvEncInitializeEncoder(ctx->nvencoder, > &ctx->initEncodeParams); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "InitializeEncoder failed: 0x%x\n", > (int)nvStatus); > + goto error; > + } > + > + ctx->inputSurfaces = (NvencInputSurface*)calloc(ctx->maxSurfaceCount, > sizeof(NvencInputSurface)); > + ctx->outputSurfaces = (NvencOutputSurface*)calloc(ctx->maxSurfaceCount, > sizeof(NvencOutputSurface)); The cast is an ugly c++ism, and ffmpeg code recommends sizeof(*variable) instead of sizeof(Type). Do you need to use calloc instead of the corresponding av_ function? Other similar cases below. > + > + for (surfaceCount = 0; surfaceCount < ctx->maxSurfaceCount; > ++surfaceCount) { > + NV_ENC_CREATE_INPUT_BUFFER allocSurf = { 0 }; > + NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; > + allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER; > + allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER; > + > + allocSurf.width = (avctx->width + 31) & ~31; > + allocSurf.height = (avctx->height + 31) & ~31; > + > + allocSurf.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; > + > + switch (avctx->pix_fmt) { > + case AV_PIX_FMT_YUV420P: > + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YV12_PL; > + break; > + > + case AV_PIX_FMT_NV12: > + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_NV12_PL; > + break; > + > + case AV_PIX_FMT_YUV444P: > + allocSurf.bufferFmt = NV_ENC_BUFFER_FORMAT_YUV444_PL; > + break; > + > + default: > + av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n"); > + goto error; > + } > + > + nvStatus = ff_pNvEnc->nvEncCreateInputBuffer(ctx->nvencoder, > &allocSurf); > + if (nvStatus = NV_ENC_SUCCESS){ > + av_log(avctx, AV_LOG_FATAL, "CreateInputBuffer failed\n"); > + goto error; > + } > + > + ctx->inputSurfaces[surfaceCount].lockCount = 0; > + ctx->inputSurfaces[surfaceCount].inputSurface = > allocSurf.inputBuffer; > + ctx->inputSurfaces[surfaceCount].format = allocSurf.bufferFmt; > + ctx->inputSurfaces[surfaceCount].width = allocSurf.width; > + ctx->inputSurfaces[surfaceCount].height = allocSurf.height; > + > + allocOut.size = 1024 * 1024; Maybe a comment to explain where this value comes from? > + allocOut.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_CACHED; > + > + nvStatus = ff_pNvEnc->nvEncCreateBitstreamBuffer(ctx->nvencoder, > &allocOut); > + if (nvStatus = NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "CreateBitstreamBuffer failed\n"); > + ctx->outputSurfaces[surfaceCount++].outputSurface = 0; > + goto error; > + } > + > + ctx->outputSurfaces[surfaceCount].outputSurface = > allocOut.bitstreamBuffer; > + ctx->outputSurfaces[surfaceCount].size = allocOut.size; > + ctx->outputSurfaces[surfaceCount].busy = 0; > + } > + > + if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) { > + uint32_t outSize = 0; > + char tmpHeader[256]; > + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = { 0 }; > + payload.version = NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER; > + > + payload.spsppsBuffer = tmpHeader; > + payload.inBufferSize = 256; > + payload.outSPSPPSPayloadSize = &outSize; > + > + nvStatus = ff_pNvEnc->nvEncGetSequenceParams(ctx->nvencoder, > &payload); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "GetSequenceParams failed\n"); > + goto error; > + } > + > + avctx->extradata_size = outSize; > + avctx->extradata = av_mallocz(outSize + > FF_INPUT_BUFFER_PADDING_SIZE); > + > + memcpy(avctx->extradata, tmpHeader, outSize); > + } else { > + avctx->extradata = 0; > + avctx->extradata_size = 0; Not needed. > + } > + > + if (ctx->encodeConfig.frameIntervalP > 1) > + avctx->has_b_frames = 2; > + > + if (ctx->encodeConfig.rcParams.averageBitRate > 0) > + avctx->bit_rate = ctx->encodeConfig.rcParams.averageBitRate; > + > + return 0; > + > +error: > + > + for (i = 0; i < surfaceCount; ++i) { > + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, > ctx->inputSurfaces[i].inputSurface); > + if (ctx->outputSurfaces[i].outputSurface) > + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, > ctx->outputSurfaces[i].outputSurface); > + } > + > + if (ctx->nvencoder) > + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder); > + > + if (ctx->cuContext) > + ff_cuCtxDestroy(ctx->cuContext); > + > + ff_nvenc_unload_nvenc(avctx); > + > + ctx->nvencoder = 0; > + ctx->cuContext = 0; > + > + return AVERROR_EXTERNAL; > +} > + > +static av_cold int nvenc_encode_close(AVCodecContext *avctx) > +{ > + NvencContext *ctx = avctx->priv_data; > + int i; > + > + if (ctx->profile) > + av_freep(&ctx->profile); Freeing NULL is valid, so you do not need to check beforehand. And in this case, since ctx->profile is an option, it is automatically freed anyway. > + > + if (avctx->extradata) > + av_freep(&avctx->extradata); extradata is automatically freed for encoders. > + > + while (ctx->timestampList) > + timestamp_list_get_lowest(&ctx->timestampList); > + > + while (ctx->outputSurfaceReadyQueue) > + out_surf_queue_pop(&ctx->outputSurfaceReadyQueue); > + > + while (ctx->outputSurfaceQueue) > + out_surf_queue_pop(&ctx->outputSurfaceQueue); > + > + for (i = 0; i < ctx->maxSurfaceCount; ++i) { > + ff_pNvEnc->nvEncDestroyInputBuffer(ctx->nvencoder, > ctx->inputSurfaces[i].inputSurface); > + ff_pNvEnc->nvEncDestroyBitstreamBuffer(ctx->nvencoder, > ctx->outputSurfaces[i].outputSurface); > + } > + ctx->maxSurfaceCount = 0; > + > + ff_pNvEnc->nvEncDestroyEncoder(ctx->nvencoder); > + ctx->nvencoder = 0; > + > + ff_cuCtxDestroy(ctx->cuContext); > + ctx->cuContext = 0; > + > + ff_nvenc_unload_nvenc(avctx); > + > + av_frame_free(&avctx->coded_frame); > + > + return 0; > +} > + > +static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, > AVFrame *coded_frame, NvencOutputSurface *tmpoutsurf) > +{ > + NvencContext *ctx = avctx->priv_data; > + uint32_t *sliceOffsets = > (uint32_t*)calloc(ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData, > sizeof(uint32_t)); > + NV_ENC_LOCK_BITSTREAM lockParams = { 0 }; > + NVENCSTATUS nvStatus; > + > + lockParams.version = NV_ENC_LOCK_BITSTREAM_VER; > + > + lockParams.doNotWait = 0; > + lockParams.outputBitstream = tmpoutsurf->outputSurface; > + lockParams.sliceOffsets = sliceOffsets; > + > + nvStatus = ff_pNvEnc->nvEncLockBitstream(ctx->nvencoder, &lockParams); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Failed locking bitstream buffer\n"); > + timestamp_list_get_lowest(&ctx->timestampList); > + return 0; Looks like it should return an error. > + } > + > + if (ff_alloc_packet2(avctx, pkt, lockParams.bitstreamSizeInBytes) < 0) { > + ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, > tmpoutsurf->outputSurface); > + timestamp_list_get_lowest(&ctx->timestampList); > + return 0; > + } Same as above, and ff_alloc_packet2() already returns a proper error code. > + > + memcpy(pkt->data, lockParams.bitstreamBufferPtr, > lockParams.bitstreamSizeInBytes); > + > + nvStatus = ff_pNvEnc->nvEncUnlockBitstream(ctx->nvencoder, > tmpoutsurf->outputSurface); > + if (nvStatus != NV_ENC_SUCCESS) > + av_log(avctx, AV_LOG_ERROR, "Failed unlocking bitstream buffer, > expect the gates of mordor to open\n"); > + > + switch (lockParams.pictureType) { > + case NV_ENC_PIC_TYPE_IDR: > + pkt->flags |= AV_PKT_FLAG_KEY; > + case NV_ENC_PIC_TYPE_I: > + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I; > + break; > + > + case NV_ENC_PIC_TYPE_P: > + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_P; > + break; > + > + case NV_ENC_PIC_TYPE_B: > + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_B; > + break; > + > + case NV_ENC_PIC_TYPE_BI: > + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_BI; > + break; > + > + default: > + avctx->coded_frame->pict_type = AV_PICTURE_TYPE_NONE; Does this happen normally? > + break; > + } > + > + pkt->pts = lockParams.outputTimeStamp; > + pkt->dts = timestamp_list_get_lowest(&ctx->timestampList) - > ctx->encodeConfig.frameIntervalP; > + > + if (pkt->dts > pkt->pts) > + pkt->dts = pkt->pts; > + > + if (ctx->lastDts != AV_NOPTS_VALUE && pkt->dts <= ctx->lastDts) > + pkt->dts = ctx->lastDts + 1; > + > + ctx->lastDts = pkt->dts; > + > + return 1; > +} > + > +static int nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, > + const AVFrame *frame, int *got_packet) > +{ > + NVENCSTATUS nvStatus; > + NvencContext *ctx = avctx->priv_data; > + NvencOutputSurface *tmpoutsurf; > + int i = 0; > + > + NV_ENC_PIC_PARAMS picParams = { 0 }; > + picParams.version = NV_ENC_PIC_PARAMS_VER; > + > + if (frame) { > + NV_ENC_LOCK_INPUT_BUFFER lockBufferParams = { 0 }; > + NvencInputSurface *inSurf = 0; > + > + for (i = 0; i < ctx->maxSurfaceCount; ++i) > + if (!ctx->inputSurfaces[i].lockCount) > + inSurf = &ctx->inputSurfaces[i]; Maybe a break here. > + av_assert0(inSurf); Are you positively sure that an input surface will always be available? > + > + inSurf->lockCount = 1; > + > + lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER; > + lockBufferParams.inputBuffer = inSurf->inputSurface; > + > + nvStatus = ff_pNvEnc->nvEncLockInputBuffer(ctx->nvencoder, > &lockBufferParams); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_ERROR, "Failed locking nvenc input > buffer\n"); > + return 0; > + } > + > + if (avctx->pix_fmt == AV_PIX_FMT_YUV420P) { > + uint8_t *buf = lockBufferParams.bufferDataPtr; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[0], frame->linesize[0], > + avctx->width, avctx->height); > + > + buf += inSurf->height * lockBufferParams.pitch; Could be factored out, unless I am missing something. > + > + av_image_copy_plane(buf, lockBufferParams.pitch >> 1, > + frame->data[2], frame->linesize[2], > + avctx->width >> 1, avctx->height >> 1); > + > + buf += (inSurf->height * lockBufferParams.pitch) >> 2; > + > + av_image_copy_plane(buf, lockBufferParams.pitch >> 1, > + frame->data[1], frame->linesize[1], > + avctx->width >> 1, avctx->height >> 1); > + } else if (avctx->pix_fmt == AV_PIX_FMT_NV12) { > + uint8_t *buf = lockBufferParams.bufferDataPtr; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[0], frame->linesize[0], > + avctx->width, avctx->height); > + > + buf += inSurf->height * lockBufferParams.pitch; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[1], frame->linesize[1], > + avctx->width, avctx->height >> 1); > + } else if (avctx->pix_fmt == AV_PIX_FMT_YUV444P) { > + uint8_t *buf = lockBufferParams.bufferDataPtr; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[0], frame->linesize[0], > + avctx->width, avctx->height); > + > + buf += inSurf->height * lockBufferParams.pitch; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[1], frame->linesize[1], > + avctx->width, avctx->height); > + > + buf += inSurf->height * lockBufferParams.pitch; > + > + av_image_copy_plane(buf, lockBufferParams.pitch, > + frame->data[2], frame->linesize[2], > + avctx->width, avctx->height); > + } else { > + av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n"); > + return AVERROR(EINVAL); > + } > + > + nvStatus = ff_pNvEnc->nvEncUnlockInputBuffer(ctx->nvencoder, > inSurf->inputSurface); > + if (nvStatus != NV_ENC_SUCCESS) { > + av_log(avctx, AV_LOG_FATAL, "Failed unlocking input buffer!\n"); > + return AVERROR_EXTERNAL; > + } > + > + for (i = 0; i < ctx->maxSurfaceCount; ++i) > + if (!ctx->outputSurfaces[i].busy) > + break; > + > + if (i == ctx->maxSurfaceCount) { > + inSurf->lockCount = 0; > + av_log(avctx, AV_LOG_ERROR, "No free output surface found!\n"); > + return 0; Proper error code? > + } > + > + ctx->outputSurfaces[i].inputSurface = inSurf; > + > + picParams.inputBuffer = inSurf->inputSurface; > + picParams.bufferFmt = inSurf->format; > + picParams.inputWidth = avctx->width; > + picParams.inputHeight = avctx->height; > + picParams.outputBitstream = ctx->outputSurfaces[i].outputSurface; > + picParams.completionEvent = 0; > + > + if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { > + if (frame->top_field_first) { > + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM; > + } else { > + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP; > + } > + } else { > + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; > + } > + > + picParams.encodePicFlags = 0; > + picParams.inputTimeStamp = frame->pts; > + picParams.inputDuration = 0; > + picParams.codecPicParams.h264PicParams.sliceMode = > ctx->encodeConfig.encodeCodecConfig.h264Config.sliceMode; > + picParams.codecPicParams.h264PicParams.sliceModeData = > ctx->encodeConfig.encodeCodecConfig.h264Config.sliceModeData; > + memcpy(&picParams.rcParams, &ctx->encodeConfig.rcParams, > sizeof(NV_ENC_RC_PARAMS)); > + > + timestamp_list_insert_sorted(&ctx->timestampList, frame->pts); > + } else { > + picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS; > + } > + > + nvStatus = ff_pNvEnc->nvEncEncodePicture(ctx->nvencoder, &picParams); > + > + if (frame && nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) { > + out_surf_queue_push(&ctx->outputSurfaceQueue, > &ctx->outputSurfaces[i]); > + ctx->outputSurfaces[i].busy = 1; > + } > + > + if (nvStatus != NV_ENC_SUCCESS && nvStatus != > NV_ENC_ERR_NEED_MORE_INPUT) { > + av_log(avctx, AV_LOG_ERROR, "EncodePicture failed!\n"); > + return AVERROR_EXTERNAL; > + } > + > + if (nvStatus != NV_ENC_ERR_NEED_MORE_INPUT) { > + while (ctx->outputSurfaceQueue) { > + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceQueue); > + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, tmpoutsurf); > + } > + > + if (frame) { > + out_surf_queue_push(&ctx->outputSurfaceReadyQueue, > &ctx->outputSurfaces[i]); > + ctx->outputSurfaces[i].busy = 1; > + } > + } > + > + if (ctx->outputSurfaceReadyQueue) { > + tmpoutsurf = out_surf_queue_pop(&ctx->outputSurfaceReadyQueue); > + > + *got_packet = process_output_surface(avctx, pkt, avctx->coded_frame, > tmpoutsurf); > + > + tmpoutsurf->busy = 0; > + av_assert0(tmpoutsurf->inputSurface->lockCount); > + tmpoutsurf->inputSurface->lockCount--; > + } > + > + return 0; > +} > + > +static int pix_fmts_nvenc_initialized; > + > +static enum AVPixelFormat pix_fmts_nvenc[] = { > + AV_PIX_FMT_NV12, > + AV_PIX_FMT_NONE, > + AV_PIX_FMT_NONE, > + AV_PIX_FMT_NONE > +}; > + > +static av_cold void nvenc_init_static(AVCodec *codec) > +{ > + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS stEncodeSessionParams = { 0 }; > + CUcontext cuctxcur = 0, cuctx = 0; > + NVENCSTATUS nvStatus; > + void *nvencoder = 0; > + GUID encodeGuid = NV_ENC_CODEC_H264_GUID; > + GUID license = dummy_license; > + int i = 0, pos = 0; > + int gotnv12 = 0, got420 = 0, got444 = 0; > + uint32_t inputFmtCount = 32; > + NV_ENC_BUFFER_FORMAT inputFmts[32]; > + > + for (i = 0; i < 32; ++i) > + inputFmts[i] = (NV_ENC_BUFFER_FORMAT)0; > + i = 0; > + > + if (pix_fmts_nvenc_initialized) { > + codec->pix_fmts = pix_fmts_nvenc; > + return; > + } > + > + if (!ff_nvenc_dyload_nvenc(0)) { > + pix_fmts_nvenc_initialized = 1; > + return; > + } > + > + stEncodeSessionParams.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; > + stEncodeSessionParams.apiVersion = NVENCAPI_VERSION; > + stEncodeSessionParams.clientKeyPtr = &license; > + > + cuctx = 0; > + if (ff_cuCtxCreate(&cuctx, 0, ff_pNvencDevices[ff_iNvencUseDeviceID]) != > CUDA_SUCCESS) { It would probably be better to get ff_cuCtxCreate() return an AVERROR code instead of a CUDA error code. Same for all ff_ helper functions. > + cuctx = 0; > + goto error; > + } > + > + if (ff_cuCtxPopCurrent(&cuctxcur) != CUDA_SUCCESS) > + goto error; > + > + stEncodeSessionParams.device = (void*)cuctx; > + stEncodeSessionParams.deviceType = NV_ENC_DEVICE_TYPE_CUDA; > + > + nvStatus = ff_pNvEnc->nvEncOpenEncodeSessionEx(&stEncodeSessionParams, > &nvencoder); > + if (nvStatus != NV_ENC_SUCCESS) { > + nvencoder = 0; > + goto error; > + } > + > + nvStatus = ff_pNvEnc->nvEncGetInputFormats(nvencoder, encodeGuid, > inputFmts, 32, &inputFmtCount); > + if (nvStatus != NV_ENC_SUCCESS) > + goto error; > + > + pos = 0; > + for (i = 0; i < inputFmtCount && pos < 3; ++i) { > + if (!gotnv12 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_PL > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED16x16 > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_NV12_TILED64x16)) { > + > + pix_fmts_nvenc[pos++] = AV_PIX_FMT_NV12; > + gotnv12 = 1; > + } else if (!got420 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_PL > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED16x16 > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YV12_TILED64x16)) { > + > + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV420P; > + got420 = 1; > + } else if (!got444 && (inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_PL > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED16x16 > + || inputFmts[i] == NV_ENC_BUFFER_FORMAT_YUV444_TILED64x16)) { > + > + pix_fmts_nvenc[pos++] = AV_PIX_FMT_YUV444P; > + got444 = 1; > + } > + } > + > + pix_fmts_nvenc[pos] = AV_PIX_FMT_NONE; > + > + pix_fmts_nvenc_initialized = 1; > + codec->pix_fmts = pix_fmts_nvenc; > + > + ff_pNvEnc->nvEncDestroyEncoder(nvencoder); > + ff_cuCtxDestroy(cuctx); > + > + ff_nvenc_unload_nvenc(0); > + > + return; > + > +error: > + > + if (nvencoder) > + ff_pNvEnc->nvEncDestroyEncoder(nvencoder); > + > + if (cuctx) > + ff_cuCtxDestroy(cuctx); > + > + pix_fmts_nvenc_initialized = 1; > + pix_fmts_nvenc[0] = AV_PIX_FMT_NV12; > + pix_fmts_nvenc[1] = AV_PIX_FMT_NONE; > + > + codec->pix_fmts = pix_fmts_nvenc; > + > + ff_nvenc_unload_nvenc(0); > +} > + > +#define OFFSET(x) offsetof(NvencContext, x) > +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM > +static const AVOption options[] = { > + { "profile", "Set profile restrictions", OFFSET(profile), > AV_OPT_TYPE_STRING, { .str = "high" }, 0, 0, VE}, > + { "preset", "Set the encoding preset (one of hq, hp, bd, ll, llhq, llhp, > default)", OFFSET(preset), AV_OPT_TYPE_STRING, { .str = "hq" }, 0, 0, VE }, > + { "cqp", "Constant quantization parameter rate control method", > OFFSET(cqp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE }, > + { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_INT, { .i64 = > 0 }, 0, 1, VE }, > + { "2pass", "Use 2pass cbr encoding mode (low latency mode only)", > OFFSET(twopass), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 1, VE }, Some of these options are redundant with global ones; "profile" already cited, "2pass" = -flags +pass1/+pass2; "cqp" = "global_quality". > + { "goppattern", "Specifies the GOP pattern as follows: 0: I, 1: IPP, 2: > IBP, 3: IBBP", OFFSET(gobpattern), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 3, VE > }, > + { NULL } > +}; > + > +static const AVClass nvenc_class = { > + .class_name = "nvenc", > + .item_name = av_default_item_name, > + .option = options, > + .version = LIBAVUTIL_VERSION_INT, > +}; > + > +static const AVCodecDefault nvenc_defaults[] = { > + { "b", "0" }, > + { "qmin", "-1" }, > + { "qmax", "-1" }, > + { "qdiff", "-1" }, > + { "qblur", "-1" }, > + { "qcomp", "-1" }, > + { NULL }, > +}; > + > +AVCodec ff_nvenc_encoder = { > + .name = "nvenc", > + .long_name = NULL_IF_CONFIG_SMALL("Nvidia NVENC h264 encoder"), > + .type = AVMEDIA_TYPE_VIDEO, > + .id = AV_CODEC_ID_H264, > + .priv_data_size = sizeof(NvencContext), > + .init = nvenc_encode_init, > + .encode2 = nvenc_encode_frame, > + .close = nvenc_encode_close, > + .capabilities = CODEC_CAP_DELAY, > + .priv_class = &nvenc_class, > + .defaults = nvenc_defaults, > + .init_static_data = nvenc_init_static > +}; > diff --git a/libavcodec/nvenc_api.c b/libavcodec/nvenc_api.c > new file mode 100644 > index 0000000..53d5fa8 > --- /dev/null > +++ b/libavcodec/nvenc_api.c > @@ -0,0 +1,275 @@ > +/* > + * H.264 hardware encoding using nvidia nvenc > + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifdef _WIN32 > +#include <windows.h> > +#else > +#include <dlfcn.h> > +#endif > + > +#include "libavutil/avassert.h" > +#include "avcodec.h" > +#include "internal.h" > + > +#include "nvenc_cuda.h" > +#include "nvenc_api.h" > + > +PCUINIT ff_cuInit = 0; > +PCUDEVICEGETCOUNT ff_cuDeviceGetCount = 0; > +PCUDEVICEGET ff_cuDeviceGet = 0; > +PCUDEVICEGETNAME ff_cuDeviceGetName = 0; > +PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability = 0; > +PCUCTXCREATE ff_cuCtxCreate = 0; > +PCUCTXPOPCURRENT ff_cuCtxPopCurrent = 0; > +PCUCTXDESTROY ff_cuCtxDestroy = 0; > + > +static int nvenc_init_count; > +static NV_ENCODE_API_FUNCTION_LIST nvEncFuncs; > +NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc = 0; > +int ff_iNvencDeviceCount = 0; > +CUdevice ff_pNvencDevices[16]; > +unsigned int ff_iNvencUseDeviceID = 0; > + > +#ifdef _WIN32 > +#define LOAD_FUNC(l, s) GetProcAddress(l, s) > +#define DL_CLOSE_FUNC(l) FreeLibrary(l) > +static HMODULE cudaLib; > +static HMODULE nvEncLib; > +#else > +#define LOAD_FUNC(l, s) dlsym(l, s) > +#define DL_CLOSE_FUNC(l) dlclose(l) > +static void *cudaLib; > +static void *nvEncLib; > +#endif > + > +#define ifav_log(...) if (avctx) { av_log(__VA_ARGS__); } Looks strange: why no error message when there is no context? > + > +#define CHECK_LOAD_FUNC(t, f, s) \ > +{ \ > + f = (t)LOAD_FUNC(cudaLib, s); \ > + if (!f) { \ > + ifav_log(avctx, AV_LOG_FATAL, "Failed loading %s from CUDA > library\n", s); \ > + goto error; \ > + } \ > +} Some compilers choke on that because of the semicolon after the block; for that reason, it is recommended to use do { ... } while (0). > + > +static int nvenc_dyload_cuda(AVCodecContext *avctx) > +{ > + if (cudaLib) > + return 1; Thread safe? > + > +#if defined(_WIN32) > + cudaLib = LoadLibrary(TEXT("nvcuda.dll")); > +#elif defined(__CYGWIN__) > + cudaLib = dlopen("nvcuda.dll", RTLD_LAZY); > +#else > + cudaLib = dlopen("libcuda.so", RTLD_LAZY); > +#endif > + > + if (!cudaLib) { > + ifav_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n"); > + goto error; > + } > + > + CHECK_LOAD_FUNC(PCUINIT, ff_cuInit, "cuInit"); > + CHECK_LOAD_FUNC(PCUDEVICEGETCOUNT, ff_cuDeviceGetCount, > "cuDeviceGetCount"); > + CHECK_LOAD_FUNC(PCUDEVICEGET, ff_cuDeviceGet, "cuDeviceGet"); > + CHECK_LOAD_FUNC(PCUDEVICEGETNAME, ff_cuDeviceGetName, "cuDeviceGetName"); > + CHECK_LOAD_FUNC(PCUDEVICECOMPUTECAPABILITY, > ff_cuDeviceComputeCapability, "cuDeviceComputeCapability"); > + CHECK_LOAD_FUNC(PCUCTXCREATE, ff_cuCtxCreate, "cuCtxCreate_v2"); > + CHECK_LOAD_FUNC(PCUCTXPOPCURRENT, ff_cuCtxPopCurrent, > "cuCtxPopCurrent_v2"); > + CHECK_LOAD_FUNC(PCUCTXDESTROY, ff_cuCtxDestroy, "cuCtxDestroy_v2"); You could almost use #name and ff_##name to avoid duplicating the parameter. > + > + return 1; > + > +error: > + > + if (cudaLib) > + DL_CLOSE_FUNC(cudaLib); > + > + cudaLib = 0; > + > + return 0; > +} > + > +static int checkCudaErrors(AVCodecContext *avctx, CUresult err, const char > *func) > +{ > + if (err != CUDA_SUCCESS) { > + ifav_log(avctx, AV_LOG_FATAL, ">> %s - failed with error code > 0x%x\n", func, err); The library does not provide error code -> string utility? > + return 0; > + } > + return 1; > +} > +#define checkCudaErrors(f) if (!checkCudaErrors(avctx, f, #f)) goto error > + > +static int nvenc_check_cuda(AVCodecContext *avctx) > +{ > + int deviceCount = 0; > + CUdevice cuDevice = 0; > + char gpu_name[128]; > + int SMminor = 0, SMmajor = 0; > + int i, smver; > + > + if (!nvenc_dyload_cuda(avctx)) > + return 0; > + > + if (ff_iNvencDeviceCount > 0) > + return 1; > + > + checkCudaErrors(ff_cuInit(0)); > + > + checkCudaErrors(ff_cuDeviceGetCount(&deviceCount)); > + > + if (!deviceCount) { > + ifav_log(avctx, AV_LOG_FATAL, "No CUDA capable devices found\n"); > + goto error; > + } > + > + ifav_log(avctx, AV_LOG_VERBOSE, "%d CUDA capable devices found\n", > deviceCount); > + > + ff_iNvencDeviceCount = 0; > + > + for (i = 0; i < deviceCount; ++i) { > + checkCudaErrors(ff_cuDeviceGet(&cuDevice, i)); > + checkCudaErrors(ff_cuDeviceGetName(gpu_name, 128, cuDevice)); sizeof(gpu_name), to avoid desync errors. > + checkCudaErrors(ff_cuDeviceComputeCapability(&SMmajor, &SMminor, > cuDevice)); > + > + smver = (SMmajor << 4) | SMminor; > + > + ifav_log(avctx, AV_LOG_VERBOSE, "[ GPU #%d - < %s > has Compute SM > %d.%d, NVENC %s ]\n", i, gpu_name, SMmajor, SMminor, (smver >= 0x30) ? > "Available" : "Not Available"); > + > + if (smver >= 0x30) > + ff_pNvencDevices[ff_iNvencDeviceCount++] = cuDevice; > + } > + > + if (!ff_iNvencDeviceCount) { > + ifav_log(avctx, AV_LOG_FATAL, "No NVENC capable devices found\n"); > + goto error; > + } > + > + return 1; > + > +error: > + > + ff_iNvencDeviceCount = 0; > + > + return 0; > +} > + > +av_cold int ff_nvenc_dyload_nvenc(AVCodecContext *avctx) > +{ > + PNVENCODEAPICREATEINSTANCE nvEncodeAPICreateInstance = 0; > + NVENCSTATUS nvstatus; > + > + if (!nvenc_check_cuda(avctx)) > + return 0; > + > + if (ff_pNvEnc) { > + nvenc_init_count++; > + return 1; > + } > + > +#if defined(_WIN32) > + if (sizeof(void*) == 8) { > + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI64.dll")); > + } else { > + nvEncLib = LoadLibrary(TEXT("nvEncodeAPI.dll")); > + } > +#elif defined(__CYGWIN__) > + if (sizeof(void*) == 8) { > + nvEncLib = dlopen("nvEncodeAPI64.dll", RTLD_LAZY); > + } else { > + nvEncLib = dlopen("nvEncodeAPI.dll", RTLD_LAZY); > + } > +#else > + nvEncLib = dlopen("libnvidia-encode.so", RTLD_LAZY); > +#endif > + > + if (!nvEncLib) { > + ifav_log(avctx, AV_LOG_FATAL, "Failed loading the nvenc library\n"); > + goto error; > + } > + > + nvEncodeAPICreateInstance = > (PNVENCODEAPICREATEINSTANCE)LOAD_FUNC(nvEncLib, "NvEncodeAPICreateInstance"); > + > + if (!nvEncodeAPICreateInstance) { > + ifav_log(avctx, AV_LOG_FATAL, "Failed to load nvenc entrypoint\n"); > + goto error; > + } > + > + ff_pNvEnc = &nvEncFuncs; > + memset(ff_pNvEnc, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST)); > + ff_pNvEnc->version = NV_ENCODE_API_FUNCTION_LIST_VER; > + > + nvstatus = nvEncodeAPICreateInstance(ff_pNvEnc); > + > + if (nvstatus != NV_ENC_SUCCESS) { > + ifav_log(avctx, AV_LOG_FATAL, "Failed to create nvenc instance\n"); > + goto error; > + } > + > + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc initialized successfully\n"); > + > + nvenc_init_count = 1; > + > + return 1; > + > +error: > + if (nvEncLib) > + DL_CLOSE_FUNC(nvEncLib); > + > + nvEncLib = 0; > + ff_pNvEnc = 0; > + nvenc_init_count = 0; > + > + return 0; > +} > + > +av_cold void ff_nvenc_unload_nvenc(AVCodecContext *avctx) > +{ > + if (nvenc_init_count <= 0) > + return; > + > + nvenc_init_count--; This looks not thread safe. > + > + if (nvenc_init_count > 0) > + return; > + > + DL_CLOSE_FUNC(nvEncLib); > + nvEncLib = 0; > + ff_pNvEnc = 0; > + > + ff_iNvencDeviceCount = 0; > + > + DL_CLOSE_FUNC(cudaLib); > + cudaLib = 0; > + > + ff_cuInit = 0; > + ff_cuDeviceGetCount = 0; > + ff_cuDeviceGet = 0; > + ff_cuDeviceGetName = 0; > + ff_cuDeviceComputeCapability = 0; > + ff_cuCtxCreate = 0; > + ff_cuCtxPopCurrent = 0; > + ff_cuCtxDestroy = 0; > + > + ifav_log(avctx, AV_LOG_VERBOSE, "Nvenc unloaded\n"); > +} > diff --git a/libavcodec/nvenc_api.h b/libavcodec/nvenc_api.h > new file mode 100644 > index 0000000..16b1c72 > --- /dev/null > +++ b/libavcodec/nvenc_api.h > @@ -0,0 +1,35 @@ > +/* > + * H.264 hardware encoding using nvidia nvenc > + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCODEC_NVENC_API_H > +#define AVCODEC_NVENC_API_H > + > +#include <nvEncodeAPI.h> > + > + > +typedef NVENCSTATUS (NVENCAPI* > PNVENCODEAPICREATEINSTANCE)(NV_ENCODE_API_FUNCTION_LIST *functionList); > + > +extern NV_ENCODE_API_FUNCTION_LIST *ff_pNvEnc; > + > +int ff_nvenc_dyload_nvenc(AVCodecContext *avctx); > +void ff_nvenc_unload_nvenc(AVCodecContext *avctx); > + > +#endif > diff --git a/libavcodec/nvenc_cuda.h b/libavcodec/nvenc_cuda.h > new file mode 100644 > index 0000000..ae43a22 > --- /dev/null > +++ b/libavcodec/nvenc_cuda.h > @@ -0,0 +1,62 @@ > +/* > + * H.264 hardware encoding using nvidia nvenc > + * Copyright (c) 2014 Timo Rothenpieler <t...@rothenpieler.org> > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#ifndef AVCODEC_NVENC_CUDA_H > +#define AVCODEC_NVENC_CUDA_H > + > +typedef enum cudaError_enum { > + CUDA_SUCCESS = 0 > +} CUresult; > +typedef int CUdevice; > +typedef void* CUcontext; > + > +#ifdef _WIN32 > +#define CUDAAPI __stdcall > +#else > +#define CUDAAPI > +#endif > + > +typedef CUresult(CUDAAPI *PCUINIT)(unsigned int Flags); > +typedef CUresult(CUDAAPI *PCUDEVICEGETCOUNT)(int *count); > +typedef CUresult(CUDAAPI *PCUDEVICEGET)(CUdevice *device, int ordinal); > +typedef CUresult(CUDAAPI *PCUDEVICEGETNAME)(char *name, int len, CUdevice > dev); > +typedef CUresult(CUDAAPI *PCUDEVICECOMPUTECAPABILITY)(int *major, int > *minor, CUdevice dev); > +typedef CUresult(CUDAAPI *PCUCTXCREATE)(CUcontext *pctx, unsigned int flags, > CUdevice dev); > +typedef CUresult(CUDAAPI *PCUCTXPOPCURRENT)(CUcontext *pctx); > +typedef CUresult(CUDAAPI *PCUCTXDESTROY)(CUcontext ctx); > + > +extern PCUINIT ff_cuInit; > +extern PCUDEVICEGETCOUNT ff_cuDeviceGetCount; > +extern PCUDEVICEGET ff_cuDeviceGet; > +extern PCUDEVICEGETNAME ff_cuDeviceGetName; > +extern PCUDEVICECOMPUTECAPABILITY ff_cuDeviceComputeCapability; > +extern PCUCTXCREATE ff_cuCtxCreate; > +extern PCUCTXPOPCURRENT ff_cuCtxPopCurrent; > +extern PCUCTXDESTROY ff_cuCtxDestroy; > + > +int ff_nvenc_dyload_cuda(AVCodecContext *avctx); > +int ff_nvenc_check_cuda(AVCodecContext *avctx); > + > +extern int ff_iNvencDeviceCount; > +extern CUdevice ff_pNvencDevices[16]; > +extern unsigned int ff_iNvencUseDeviceID; > + > +#endif Regards, -- Nicolas George
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel