Felix Paul Kühne pushed to branch master at VideoLAN / VLC


Commits:
770a6e1c by Thomas Guillem at 2026-02-28T11:50:17+01:00
nvdec: refactor GPU copy by using chroma description

- - - - -
910a544a by Thomas Guillem at 2026-02-28T11:50:17+01:00
nvdec: gl: use chroma desc to calculate WidthInBytes

- - - - -


2 changed files:

- modules/hw/nvdec/nvdec.c
- modules/hw/nvdec/nvdec_gl.c


Changes:

=====================================
modules/hw/nvdec/nvdec.c
=====================================
@@ -112,6 +112,7 @@ struct nvdec_ctx {
     size_t                      decoderHeight;
 
     unsigned int                outputPitch;
+    const vlc_chroma_description_t *chroma_desc;
     hw_pool_t                   *out_pool;
     hw_pool_owner_t             pool_owner;
 
@@ -350,20 +351,15 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, 
CUVIDEOFORMAT *p_format)
                             + tex_alignment - 1) / tex_alignment * 
tex_alignment;
 
         unsigned int ByteWidth = p_sys->outputPitch;
+        vlc_fourcc_t sw_chroma = 
NVDECToVlcChroma(p_dec->fmt_out.video.i_chroma);
+        p_sys->chroma_desc = vlc_fourcc_GetChromaDescription(sw_chroma);
+        const vlc_chroma_description_t *desc = p_sys->chroma_desc;
+        if (desc == NULL)
+            goto cuda_error;
         unsigned int Height = p_dec->fmt_out.video.i_height;
-        switch (dparams.OutputFormat)
-        {
-            case cudaVideoSurfaceFormat_YUV444:
-            case cudaVideoSurfaceFormat_YUV444_16Bit:
-                Height += 2 * Height; // 3 planes
-                break;
-            case cudaVideoSurfaceFormat_NV12:
-            case cudaVideoSurfaceFormat_P016:
-                Height += Height / 2; // U and V at quarter resolution
-                break;
-            default:
-                vlc_assert_unreachable();
-        }
+        for (unsigned i = 1; i < desc->plane_count; i++)
+            Height += p_dec->fmt_out.video.i_height * desc->p[i].h.num
+                    / desc->p[i].h.den;
 
         ret = CALL_CUDA_DEC(cuCtxPushCurrent, p_sys->devsys->cuCtx);
         if (ret != CUDA_SUCCESS)
@@ -404,6 +400,8 @@ static int CUDAAPI HandleVideoSequence(void *p_opaque, 
CUVIDEOFORMAT *p_format)
         if (p_sys->out_pool == NULL)
             goto cuda_error;
     }
+    else
+        p_sys->chroma_desc = NULL;
 
     p_sys->decoderHeight = p_format->coded_height;
 
@@ -478,53 +476,29 @@ static int CUDAAPI HandlePictureDisplay(void *p_opaque, 
CUVIDPARSERDISPINFO *p_d
 
         size_t srcY = 0;
         size_t dstY = 0;
-        if (p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444 || 
p_pic->format.i_chroma == VLC_CODEC_NVDEC_OPAQUE_444_16B)
-        {
-            for (int i_plane = 0; i_plane < 3; i_plane++) {
-                CUDA_MEMCPY2D cu_cpy = {
-                    .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .srcDevice      = frameDevicePtr,
-                    .srcY           = srcY,
-                    .srcPitch       = i_pitch,
-                    .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .dstDevice      = picctx->devicePtr,
-                    .dstPitch       = picctx->bufferPitch,
-                    .dstY           = dstY,
-                    .WidthInBytes   = i_pitch,
-                    .Height         = __MIN(picctx->bufferHeight, 
p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
-                };
-                result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
-                if (unlikely(result != VLC_SUCCESS))
-                    goto error;
-
-                srcY += picctx->bufferHeight;
-                dstY += p_sys->decoderHeight;
-            }
-        }
-        else
+        const vlc_chroma_description_t *desc = p_sys->chroma_desc;
+        assert(desc != NULL);
+        for (unsigned i_plane = 0; i_plane < desc->plane_count; i_plane++)
         {
-            for (int i_plane = 0; i_plane < 2; i_plane++) {
-                CUDA_MEMCPY2D cu_cpy = {
-                    .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .srcDevice      = frameDevicePtr,
-                    .srcY           = srcY,
-                    .srcPitch       = i_pitch,
-                    .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
-                    .dstDevice      = picctx->devicePtr,
-                    .dstPitch       = picctx->bufferPitch,
-                    .dstY           = dstY,
-                    .WidthInBytes   = i_pitch,
-                    .Height         = __MIN(picctx->bufferHeight, 
p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
-                };
-                if (i_plane == 1)
-                    cu_cpy.Height >>= 1;
-                result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
-                if (unlikely(result != VLC_SUCCESS))
-                    goto error;
+            CUDA_MEMCPY2D cu_cpy = {
+                .srcMemoryType  = CU_MEMORYTYPE_DEVICE,
+                .srcDevice      = frameDevicePtr,
+                .srcY           = srcY,
+                .srcPitch       = i_pitch,
+                .dstMemoryType  = CU_MEMORYTYPE_DEVICE,
+                .dstDevice      = picctx->devicePtr,
+                .dstPitch       = picctx->bufferPitch,
+                .dstY           = dstY,
+                .WidthInBytes   = i_pitch,
+                .Height         = __MIN(picctx->bufferHeight, 
p_dec->fmt_out.video.i_y_offset + p_dec->fmt_out.video.i_visible_height),
+            };
+            cu_cpy.Height = cu_cpy.Height * desc->p[i_plane].h.num / 
desc->p[i_plane].h.den;
+            result = CALL_CUDA_DEC(cuMemcpy2DAsync, &cu_cpy, 0);
+            if (unlikely(result != VLC_SUCCESS))
+                goto error;
 
-                srcY += picctx->bufferHeight;
-                dstY += p_sys->decoderHeight;
-            }
+            srcY += picctx->bufferHeight;
+            dstY += p_sys->decoderHeight;
         }
     }
     else


=====================================
modules/hw/nvdec/nvdec_gl.c
=====================================
@@ -47,6 +47,8 @@ typedef struct {
     CUgraphicsResource cu_res[PICTURE_PLANE_MAX]; // Y, UV for NV12/P010
     CUarray mappedArray[PICTURE_PLANE_MAX];
 
+    unsigned pixel_size;
+
     struct {
         PFNGLBINDTEXTUREPROC BindTexture;
         PFNGLGETERRORPROC GetError;
@@ -122,11 +124,9 @@ tc_nvdec_gl_update(const struct vlc_gl_interop *interop, 
uint32_t textures[],
             .srcY           = srcY,
             .dstMemoryType = CU_MEMORYTYPE_ARRAY,
             .dstArray = p_sys->mappedArray[i],
-            .WidthInBytes = tex_widths[0],
+            .WidthInBytes = tex_widths[0] * p_sys->pixel_size,
             .Height = tex_heights[i],
         };
-        if (interop->fmt_in.i_chroma != VLC_CODEC_NVDEC_OPAQUE && 
interop->fmt_in.i_chroma != VLC_CODEC_NVDEC_OPAQUE_444)
-            cu_cpy.WidthInBytes *= 2;
         result = CALL_CUDA(cuMemcpy2DAsync, &cu_cpy, 0);
         if (result != VLC_SUCCESS)
             goto error;
@@ -207,6 +207,13 @@ static int Open(struct vlc_gl_interop *interop)
     video_format_TransformBy(&interop->fmt_out, TRANSFORM_VFLIP);
 
     vlc_fourcc_t render_chroma = NVDECToVlcChroma(interop->fmt_in.i_chroma);
+    const vlc_chroma_description_t *desc = 
vlc_fourcc_GetChromaDescription(render_chroma);
+    if (desc == NULL)
+    {
+        vlc_decoder_device_Release(device);
+        return VLC_EGENERIC;
+    }
+    p_sys->pixel_size = desc->pixel_size;
     switch (render_chroma)
     {
         case VLC_CODEC_P010:



View it on GitLab: 
https://code.videolan.org/videolan/vlc/-/compare/0bd45a13ccaddf0ef1e5e69c2a47aec5f14cf2de...910a544aa6e5252266398cc911c2c9399766df8a

-- 
View it on GitLab: 
https://code.videolan.org/videolan/vlc/-/compare/0bd45a13ccaddf0ef1e5e69c2a47aec5f14cf2de...910a544aa6e5252266398cc911c2c9399766df8a
You're receiving this email because of your account on code.videolan.org.


VideoLAN code repository instance
_______________________________________________
vlc-commits mailing list
[email protected]
https://mailman.videolan.org/listinfo/vlc-commits

Reply via email to