From 84949792cbc547ac62ece52747b25ab3a7f9aca3 Mon Sep 17 00:00:00 2001
From: Oliver Collyer <ovcollyer@mac.com>
Date: Wed, 24 Aug 2016 10:46:55 +0100
Subject: [PATCH] NVENC - added support for 10 bit HEVC encoding

---
 configure               |   4 +-
 libavcodec/nvenc.c      | 133 ++++++++++++++++++++++++++++++++++++++++++++++--
 libavcodec/nvenc.h      |   5 ++
 libavcodec/nvenc_hevc.c |   5 +-
 libavcodec/version.h    |   2 +-
 5 files changed, 140 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index 5b017fd..8342ba8 100755
--- a/configure
+++ b/configure
@@ -5777,8 +5777,8 @@ enabled mmal && check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_M
 
 enabled netcdf            && require_pkg_config netcdf netcdf.h nc_inq_libvers
 enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } &&
-                             { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 6" ||
-                               die "ERROR: NVENC API version 5 or older is not supported"; } &&
+                             { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 7" ||
+                               die "ERROR: NVENC API version 6 or older is not supported"; } &&
                              { [ $target_os != cygwin ] || die "ERROR: NVENC is not supported on Cygwin currently."; }
 enabled openal            && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                                check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 984dd3b..d12d91b 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -75,14 +75,24 @@
 
 const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
     AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV420P10,
     AV_PIX_FMT_NV12,
+    AV_PIX_FMT_P010,
     AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUV444P16,
 #if CONFIG_CUDA
     AV_PIX_FMT_CUDA,
 #endif
     AV_PIX_FMT_NONE
 };
 
+#define IS_10BIT(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV420P10 || \
+                           pix_fmt == AV_PIX_FMT_P010 ||      \
+                           pix_fmt == AV_PIX_FMT_YUV444P16)
+
+#define IS_YUV444(pix_fmt) (pix_fmt == AV_PIX_FMT_YUV444P || \
+                            pix_fmt == AV_PIX_FMT_YUV444P16)
+
 static const struct {
     NVENCSTATUS nverr;
     int         averr;
@@ -273,7 +283,7 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
     }
 
     ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_YUV444_ENCODE);
-    if (ctx->data_pix_fmt == AV_PIX_FMT_YUV444P && ret <= 0) {
+    if (IS_YUV444(ctx->data_pix_fmt) && ret <= 0) {
         av_log(avctx, AV_LOG_VERBOSE, "YUV444P not supported\n");
         return AVERROR(ENOSYS);
     }
@@ -314,6 +324,12 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
         return AVERROR(ENOSYS);
     }
 
+    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
+    if (IS_10BIT(ctx->data_pix_fmt) && ret <= 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n");
+        return AVERROR(ENOSYS);
+    }
+
     return 0;
 }
 
@@ -800,9 +816,26 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
         hevc->outputPictureTimingSEI   = 1;
     }
 
-    /* No other profile is supported in the current SDK version 5 */
-    cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
-    avctx->profile = FF_PROFILE_HEVC_MAIN;
+    switch(ctx->profile) {
+    case NV_ENC_HEVC_PROFILE_MAIN:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN;
+        break;
+    case NV_ENC_HEVC_PROFILE_MAIN_10:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+        break;
+    }
+
+    // force setting profile as main10 if input is 10 bit
+    if (IS_10BIT(ctx->data_pix_fmt)) {
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+    }
+
+    hevc->chromaFormatIDC = IS_YUV444(ctx->data_pix_fmt) ? 3 : 1;
+
+    hevc->pixelBitDepthMinus8 = IS_10BIT(ctx->data_pix_fmt) ? 2 : 0;
 
     hevc->level = ctx->level;
 
@@ -954,14 +987,26 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
         ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
         break;
 
+    case AV_PIX_FMT_YUV420P10:
+        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+        break;
+
     case AV_PIX_FMT_NV12:
         ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
         break;
 
+    case AV_PIX_FMT_P010:
+        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+        break;
+
     case AV_PIX_FMT_YUV444P:
         ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
         break;
 
+    case AV_PIX_FMT_YUV444P16:
+        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+        break;
+
     default:
         av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
         return AVERROR(EINVAL);
@@ -1206,6 +1251,49 @@ static NvencSurface *get_free_frame(NvencContext *ctx)
     return NULL;
 }
 
+static void copy_single_10bit_plane(uint8_t *dst, int dst_linesize,
+                                    const uint8_t *src, int src_linesize,
+                                    int width, int height)
+{
+    if (!dst || !src)
+        return;
+    av_assert0(abs(src_linesize) >= width << 1);
+    av_assert0(abs(dst_linesize) >= width << 1);
+    for (;height > 0; height--) {
+        uint16_t* tdst = (uint16_t*)dst;
+        uint16_t* tsrc = (uint16_t*)src;
+        for (int w = width; w > 0; w--) {
+            *tdst++ = *tsrc++ << 6;
+        }
+        dst += dst_linesize;
+        src += src_linesize;
+    }
+}
+
+static void interleave_10bit_planes(uint8_t *dst, int dst_linesize,
+                                    const uint8_t *src1, int src1_linesize,
+                                    const uint8_t *src2, int src2_linesize,
+                                    int width, int height)
+{
+    if (!dst || !src1 || !src2)
+        return;
+    av_assert0(abs(src1_linesize) >= width);
+    av_assert0(abs(src2_linesize) >= width);
+    av_assert0(abs(dst_linesize) >= width << 1);
+    for (;height > 0; height--) {
+        uint16_t* tdst = (uint16_t*)dst;
+        uint16_t* tsrc1 = (uint16_t*)src1;
+        uint16_t* tsrc2 = (uint16_t*)src2;
+        for (int w = width; w > 0; w-=2) {
+            *tdst++ = *tsrc1++ << 6;
+            *tdst++ = *tsrc2++ << 6;
+        }
+        dst += dst_linesize;
+        src1 += src1_linesize;
+        src2 += src2_linesize;
+    }
+}
+
 static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
             NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
 {
@@ -1228,6 +1316,17 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
         av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
             frame->data[1], frame->linesize[1],
             avctx->width >> 1, avctx->height >> 1);
+    } else if (frame->format == AV_PIX_FMT_YUV420P10) {
+        copy_single_10bit_plane(buf, lockBufferParams->pitch,
+            frame->data[0], frame->linesize[0],
+            avctx->width, avctx->height);
+
+        buf += off;
+
+        interleave_10bit_planes(buf, lockBufferParams->pitch,
+            frame->data[1], frame->linesize[1],
+            frame->data[2], frame->linesize[2],
+            avctx->width, avctx->height >> 1);
     } else if (frame->format == AV_PIX_FMT_NV12) {
         av_image_copy_plane(buf, lockBufferParams->pitch,
             frame->data[0], frame->linesize[0],
@@ -1238,6 +1337,16 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
         av_image_copy_plane(buf, lockBufferParams->pitch,
             frame->data[1], frame->linesize[1],
             avctx->width, avctx->height >> 1);
+    } else if (frame->format == AV_PIX_FMT_P010) {
+        av_image_copy_plane(buf, lockBufferParams->pitch,
+            frame->data[0], frame->linesize[0],
+            avctx->width << 1, avctx->height);
+
+        buf += off;
+
+        av_image_copy_plane(buf, lockBufferParams->pitch,
+            frame->data[1], frame->linesize[1],
+            avctx->width << 1, avctx->height >> 1);
     } else if (frame->format == AV_PIX_FMT_YUV444P) {
         av_image_copy_plane(buf, lockBufferParams->pitch,
             frame->data[0], frame->linesize[0],
@@ -1254,6 +1363,22 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
         av_image_copy_plane(buf, lockBufferParams->pitch,
             frame->data[2], frame->linesize[2],
             avctx->width, avctx->height);
+    } else if (frame->format == AV_PIX_FMT_YUV444P16) {
+        av_image_copy_plane(buf, lockBufferParams->pitch,
+            frame->data[0], frame->linesize[0],
+            avctx->width << 1, avctx->height);
+
+        buf += off;
+
+        av_image_copy_plane(buf, lockBufferParams->pitch,
+            frame->data[1], frame->linesize[1],
+            avctx->width << 1, avctx->height);
+
+        buf += off;
+
+        av_image_copy_plane(buf, lockBufferParams->pitch,
+            frame->data[2], frame->linesize[2],
+            avctx->width << 1, avctx->height);
     } else {
         av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
         return AVERROR(EINVAL);
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 961cbc7..3cf0309 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -117,6 +117,11 @@ enum {
 };
 
 enum {
+    NV_ENC_HEVC_PROFILE_MAIN,
+    NV_ENC_HEVC_PROFILE_MAIN_10,
+};
+
+enum {
     NVENC_LOWLATENCY = 1,
     NVENC_LOSSLESS   = 2,
     NVENC_ONE_PASS   = 4,
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 1ce7c89..d9a4bf6 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -39,8 +39,9 @@ static const AVOption options[] = {
     { "llhp",       "low latency hp",                     0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOW_LATENCY_HP }, 0, 0, VE, "preset" },
     { "lossless",   "lossless",                           0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_DEFAULT }, 0, 0, VE, "preset" },
     { "losslesshp", "lossless hp",                        0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_HP }, 0, 0, VE, "preset" },
-    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = FF_PROFILE_HEVC_MAIN }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN, VE, "profile" },
-    { "main",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = FF_PROFILE_HEVC_MAIN }, 0, 0, VE, "profile" },
+    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, NV_ENC_HEVC_PROFILE_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile" },
+    { "main",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+    { "main10",  "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN_10 }, 0, 0, VE, "profile" },
     { "level",   "Set the encoding level restriction",   OFFSET(level),        AV_OPT_TYPE_INT,    { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" },
     { "auto",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_AUTOSELECT },  0, 0, VE,  "level" },
     { "1",       "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_HEVC_1 },  0, 0, VE,  "level" },
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 7ee5b5a..f7a7344 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #define LIBAVCODEC_VERSION_MAJOR  57
 #define LIBAVCODEC_VERSION_MINOR  54
-#define LIBAVCODEC_VERSION_MICRO 100
+#define LIBAVCODEC_VERSION_MICRO 101
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
                                                LIBAVCODEC_VERSION_MINOR, \
-- 
2.7.4

