PR #21000 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21000
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/21000.patch

The format is raw and uncompressed, however unpacking is a memcpy, and in case 
of big 6k or even 12k frames being several hundred megabytes large, the 
bandwidth can be far too much for CPUs.
Also, most of DPX cannot be directly uploaded without a software conversion 
since almost nothing supports 3-component image formats like RGB48, and 10 and 
12-bit formats are stored either tightly packed or in 32-bit dwords.

Speedup over software is around 3x for Intel, 6x for AMD, and 185x (!) for 
Nvidia. You can use the [small program I 
wrote](https://github.com/cyanreg/dec_tx_test) to benchmark it, and encoding.

Nvidia hardware seems to really suck at uploading or downloading anything, but 
host image copies seem to be the only fast way to do so, so we exploit them a 
bit here. They're not yet stable enough for all uploads, but we'll get there.


>From 41c50176c70ae80b9e49ddb89555acf4da6256cf Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 13 Nov 2025 12:09:11 +0100
Subject: [PATCH 01/11] hwcontext_vulkan: enable runtime descriptor sizing

We were already using this in places, but it seems validation
layers finally got support to detect it.
---
 libavutil/hwcontext_vulkan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index a6bf9a590b..0408b9c117 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -307,6 +307,7 @@ static void 
device_features_copy_needed(VulkanDeviceFeatures *dst, VulkanDeviceF
     COPY_VAL(vulkan_1_2.vulkanMemoryModel);
     COPY_VAL(vulkan_1_2.vulkanMemoryModelDeviceScope);
     COPY_VAL(vulkan_1_2.uniformBufferStandardLayout);
+    COPY_VAL(vulkan_1_2.runtimeDescriptorArray);
 
     COPY_VAL(vulkan_1_3.dynamicRendering);
     COPY_VAL(vulkan_1_3.maintenance4);
-- 
2.49.1


>From 8e0adcb97f11f3f769d4e44eaf6ef291d43c25f4 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 20 Nov 2025 01:36:07 +0100
Subject: [PATCH 02/11] hwcontext_vulkan: enable host image copy but disable
 its automatic use

This keeps the current behaviour and allows to use host image copy
in more direct ways.
---
 libavutil/hwcontext_vulkan.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 0408b9c117..2834d45c41 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -656,6 +656,7 @@ static const VulkanOptExtension optional_device_exts[] = {
     { VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME,               
FF_VK_EXT_COOP_MATRIX            },
     { VK_EXT_SHADER_OBJECT_EXTENSION_NAME,                    
FF_VK_EXT_SHADER_OBJECT          },
     { VK_KHR_SHADER_SUBGROUP_ROTATE_EXTENSION_NAME,           
FF_VK_EXT_SUBGROUP_ROTATE        },
+    { VK_EXT_HOST_IMAGE_COPY_EXTENSION_NAME,                  
FF_VK_EXT_HOST_IMAGE_COPY        },
 #ifdef VK_EXT_zero_initialize_device_memory
     { VK_EXT_ZERO_INITIALIZE_DEVICE_MEMORY_EXTENSION_NAME,    
FF_VK_EXT_ZERO_INITIALIZE        },
 #endif
@@ -1707,25 +1708,6 @@ static void vulkan_device_uninit(AVHWDeviceContext *ctx)
     ff_vk_uninit(&p->vkctx);
 }
 
-static int vulkan_device_has_rebar(AVHWDeviceContext *ctx)
-{
-    VulkanDevicePriv *p = ctx->hwctx;
-    VkDeviceSize max_vram = 0, max_visible_vram = 0;
-
-    /* Get device memory properties */
-    for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
-        const VkMemoryType type = p->mprops.memoryTypes[i];
-        const VkMemoryHeap heap = p->mprops.memoryHeaps[type.heapIndex];
-        if (!(type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
-            continue;
-        max_vram = FFMAX(max_vram, heap.size);
-        if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
-            max_visible_vram = FFMAX(max_visible_vram, heap.size);
-    }
-
-    return max_vram - max_visible_vram < 1024; /* 1 kB tolerance */
-}
-
 static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
                                          VulkanDeviceSelection *dev_select,
                                          int disable_multiplane,
@@ -2079,7 +2061,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
     vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
 
     /* Only use host image transfers if ReBAR is enabled */
-    p->disable_host_transfer = !vulkan_device_has_rebar(ctx);
+    p->disable_host_transfer = 1;
 
 end:
     av_free(qf_vid);
-- 
2.49.1


>From 056a0bb269b1f35409946cd63d46cb7a41bcc3bd Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 5 Nov 2025 10:31:18 +0100
Subject: [PATCH 03/11] vulkan/common: add a function to flush/invalidate a
 buffer and use it

Just for convenience.
---
 libavutil/hwcontext_vulkan.c | 45 ++++++++++++------------------------
 libavutil/vulkan.c           | 31 +++++++++++++++++++++++++
 libavutil/vulkan.h           |  7 ++++++
 3 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2834d45c41..13332c1b5b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -4154,29 +4154,10 @@ static int copy_buffer_data(AVHWFramesContext *hwfc, 
AVBufferRef *buf,
                             AVFrame *swf, VkBufferImageCopy *region,
                             int planes, int upload)
 {
-    VkResult ret;
+    int err;
     VulkanDevicePriv *p = hwfc->device_ctx->hwctx;
-    FFVulkanFunctions *vk = &p->vkctx.vkfn;
-    AVVulkanDeviceContext *hwctx = &p->p;
-
     FFVkBuffer *vkbuf = (FFVkBuffer *)buf->data;
 
-    const VkMappedMemoryRange flush_info = {
-        .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
-        .memory = vkbuf->mem,
-        .size   = VK_WHOLE_SIZE,
-    };
-
-    if (!upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
-        ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev, 1,
-                                               &flush_info);
-        if (ret != VK_SUCCESS) {
-            av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: 
%s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    }
-
     if (upload) {
         for (int i = 0; i < planes; i++)
             av_image_copy_plane(vkbuf->mapped_mem + region[i].bufferOffset,
@@ -4185,7 +4166,21 @@ static int copy_buffer_data(AVHWFramesContext *hwfc, 
AVBufferRef *buf,
                                 swf->linesize[i],
                                 swf->linesize[i],
                                 region[i].imageExtent.height);
+
+        err = ff_vk_flush_buffer(&p->vkctx, vkbuf, 0, VK_WHOLE_SIZE, 1);
+        if (err != VK_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
+                   av_err2str(err));
+            return AVERROR_EXTERNAL;
+        }
     } else {
+        err = ff_vk_flush_buffer(&p->vkctx, vkbuf, 0, VK_WHOLE_SIZE, 0);
+        if (err != VK_SUCCESS) {
+            av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate buffer data: 
%s\n",
+                   av_err2str(err));
+            return AVERROR_EXTERNAL;
+        }
+
         for (int i = 0; i < planes; i++)
             av_image_copy_plane(swf->data[i],
                                 swf->linesize[i],
@@ -4195,16 +4190,6 @@ static int copy_buffer_data(AVHWFramesContext *hwfc, 
AVBufferRef *buf,
                                 region[i].imageExtent.height);
     }
 
-    if (upload && !(vkbuf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
-        ret = vk->FlushMappedMemoryRanges(hwctx->act_dev, 1,
-                                          &flush_info);
-        if (ret != VK_SUCCESS) {
-            av_log(hwfc, AV_LOG_ERROR, "Failed to flush buffer data: %s\n",
-                   ff_vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-    }
-
     return 0;
 }
 
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 54448a32e5..c7f86d524c 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1167,6 +1167,37 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer 
**buf, uint8_t *mem[],
     return 0;
 }
 
+int ff_vk_flush_buffer(FFVulkanContext *s, FFVkBuffer *buf,
+                       size_t offset, size_t mem_size,
+                       int flush)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    if (buf->host_ref || buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+        return 0;
+
+    const VkMappedMemoryRange flush_data = {
+        .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+        .memory = buf->mem,
+        .offset = offset,
+        .size   = mem_size,
+    };
+
+    if (flush)
+        ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, 1, &flush_data);
+    else
+        ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, 1, 
&flush_data);
+
+    if (ret != VK_SUCCESS) {
+        av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    return 0;
+}
+
 int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers,
                         int flush)
 {
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index e1c9a5792f..bdc20e4645 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -523,6 +523,13 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, 
size_t size,
                      void *pNext, void *alloc_pNext,
                      VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
 
+/**
+ * Flush or invalidate a single buffer, with a given size and offset.
+ */
+int ff_vk_flush_buffer(FFVulkanContext *s, FFVkBuffer *buf,
+                       size_t offset, size_t mem_size,
+                       int flush);
+
 /**
  * Buffer management code.
  */
-- 
2.49.1


>From d0c01040052b0f3187727a9d7094ccbdc964ea02 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 18 Nov 2025 12:09:51 +0100
Subject: [PATCH 04/11] vulkan/common: add reverse2 endian reversal macro

---
 libavcodec/vulkan/common.comp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
index 6825693fa3..eda92ce28d 100644
--- a/libavcodec/vulkan/common.comp
+++ b/libavcodec/vulkan/common.comp
@@ -79,6 +79,9 @@ uint64_t align64(uint64_t src, uint64_t a)
     return src + a - res;
 }
 
+#define reverse2(src) \
+    (pack16(unpack8(uint16_t(src)).yx))
+
 #define reverse4(src) \
     (pack32(unpack8(uint32_t(src)).wzyx))
 
-- 
2.49.1


>From 43929dfed57249f14ead92e9cd3341c34a190b24 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Thu, 20 Nov 2025 18:09:17 +0100
Subject: [PATCH 05/11] ffv1enc: add entries for X2BGR10/X2RGB10

They are output by Vulkan decoders, and are also a very common
display surface format.
---
 libavcodec/ffv1enc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 8e5ebe773c..5daa3aa0cd 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -914,6 +914,8 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext 
*avctx,
     case AV_PIX_FMT_GBRP9:
         if (!avctx->bits_per_raw_sample)
             s->bits_per_raw_sample = 9;
+    case AV_PIX_FMT_X2BGR10:
+    case AV_PIX_FMT_X2RGB10:
     case AV_PIX_FMT_GBRP10:
     case AV_PIX_FMT_GBRAP10:
         if (!avctx->bits_per_raw_sample && !s->bits_per_raw_sample)
-- 
2.49.1


>From a4e99b52d565558a9dd0f7839ee7fd595b0d795e Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Sat, 22 Nov 2025 13:13:43 +0100
Subject: [PATCH 06/11] vulkan_ffv1/prores: remove unnecessary slice buffer
 unref

The slice buffer is already unref'd by ff_vk_decode_free_frame().
---
 libavcodec/vulkan_ffv1.c   | 1 -
 libavcodec/vulkan_prores.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 1ed9d7dd6c..66659e0069 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -1148,7 +1148,6 @@ static void vk_ffv1_free_frame_priv(AVRefStructOpaque 
_hwctx, void *data)
                    i, status, crc_res);
     }
 
-    av_buffer_unref(&vp->slices_buf);
     av_buffer_unref(&fp->slice_state);
     av_buffer_unref(&fp->slice_offset_buf);
     av_buffer_unref(&fp->slice_status_buf);
diff --git a/libavcodec/vulkan_prores.c b/libavcodec/vulkan_prores.c
index 42e8fa0b06..edab4f8564 100644
--- a/libavcodec/vulkan_prores.c
+++ b/libavcodec/vulkan_prores.c
@@ -577,7 +577,6 @@ static void vk_prores_free_frame_priv(AVRefStructOpaque 
_hwctx, void *data)
 
     ff_vk_decode_free_frame(dev_ctx, &pp->vp);
 
-    av_buffer_unref(&vp->slices_buf);
     av_buffer_unref(&pp->metadata_buf);
 }
 
-- 
2.49.1


>From 982ec02c7ca278140e9612093e1ec05b7de25687 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Sat, 22 Nov 2025 22:37:17 +0100
Subject: [PATCH 07/11] vulkan_ffv1: initialize only the necessary shaders on
 init

The decoder will reinit the hwaccel upon pixfmt/dimension changes,
so we can remove the f->use32bit and is_rgb variants of all shaders.

This speeds up init time.
---
 libavcodec/vulkan_ffv1.c | 91 ++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 51 deletions(-)

diff --git a/libavcodec/vulkan_ffv1.c b/libavcodec/vulkan_ffv1.c
index 66659e0069..430054cbe1 100644
--- a/libavcodec/vulkan_ffv1.c
+++ b/libavcodec/vulkan_ffv1.c
@@ -59,11 +59,11 @@ typedef struct FFv1VulkanDecodePicture {
 } FFv1VulkanDecodePicture;
 
 typedef struct FFv1VulkanDecodeContext {
-    AVBufferRef *intermediate_frames_ref[2]; /* 16/32 bit */
+    AVBufferRef *intermediate_frames_ref;
 
     FFVulkanShader setup;
     FFVulkanShader reset[2]; /* AC/Golomb */
-    FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
+    FFVulkanShader decode[2]; /* AC/Golomb */
 
     FFVkBuffer rangecoder_static_buf;
     FFVkBuffer quant_buf;
@@ -239,7 +239,7 @@ static int vk_ffv1_start_frame(AVCodecContext          
*avctx,
         if (!vp->dpb_frame)
             return AVERROR(ENOMEM);
 
-        err = av_hwframe_get_buffer(fv->intermediate_frames_ref[f->use32bit],
+        err = av_hwframe_get_buffer(fv->intermediate_frames_ref,
                                     vp->dpb_frame, 0);
         if (err < 0)
             return err;
@@ -527,7 +527,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
                     f->plane_count);
 
     /* Decode */
-    decode_shader = &fv->decode[f->use32bit][f->ac == AC_GOLOMB_RICE][is_rgb];
+    decode_shader = &fv->decode[f->ac == AC_GOLOMB_RICE];
     ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
                                     1, 0, 0,
                                     slice_state,
@@ -823,7 +823,7 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
                               FFVulkanShader *shd,
                               AVHWFramesContext *dec_frames_ctx,
                               AVHWFramesContext *out_frames_ctx,
-                              int use32bit, int ac, int rgb)
+                              int ac, int rgb)
 {
     int err;
     FFVulkanDescriptorSetBinding *desc_set;
@@ -831,6 +831,7 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
     uint8_t *spv_data;
     size_t spv_len;
     void *spv_opaque = NULL;
+
     int use_cached_reader = ac != AC_GOLOMB_RICE &&
                             s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
 
@@ -879,7 +880,7 @@ static int init_decode_shader(FFV1Context *f, 
FFVulkanContext *s,
 
     RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
 
-    define_shared_code(shd, use32bit);
+    define_shared_code(shd, f->use32bit);
     if (ac == AC_GOLOMB_RICE)
         GLSLD(ff_source_ffv1_vlc_comp);
 
@@ -977,16 +978,12 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared 
*ctx)
 
     ff_vk_shader_free(&ctx->s, &fv->setup);
 
-    for (int i = 0; i < 2; i++) /* 16/32 bit */
-        av_buffer_unref(&fv->intermediate_frames_ref[i]);
+    av_buffer_unref(&fv->intermediate_frames_ref);
 
-    for (int i = 0; i < 2; i++) /* AC/Golomb */
+    for (int i = 0; i < 2; i++) { /* AC/Golomb */
         ff_vk_shader_free(&ctx->s, &fv->reset[i]);
-
-    for (int i = 0; i < 2; i++) /* 16/32 bit */
-        for (int j = 0; j < 2; j++) /* AC/Golomb */
-            for (int k = 0; k < 2; k++) /* Normal/RGB */
-                ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
+        ff_vk_shader_free(&ctx->s, &fv->decode[i]);
+    }
 
     ff_vk_free_buf(&ctx->s, &fv->quant_buf);
     ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
@@ -1008,6 +1005,11 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
     FFv1VulkanDecodeContext *fv;
     FFVkSPIRVCompiler *spv;
 
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+    enum AVPixelFormat sw_format = hwfc->sw_format;
+    int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
+                 !(sw_format == AV_PIX_FMT_YA8);
+
     if (f->version < 3 ||
         (f->version == 4 && f->micro_version > 3))
         return AVERROR(ENOTSUP);
@@ -1032,36 +1034,27 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
     ctx->sd_ctx_free = &vk_decode_ffv1_uninit;
 
     /* Intermediate frame pool for RCT */
-    for (int i = 0; i < 2; i++) { /* 16/32 bit */
-        RET(init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref[i],
-                          i ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16));
-    }
+    RET(init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref,
+                      f->use32bit ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16));
 
     /* Setup shader */
     RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup));
 
-    /* Reset shaders */
     for (int i = 0; i < 2; i++) { /* AC/Golomb */
+        /* Reset shaders */
         RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool,
                               spv, &fv->reset[i], !i ? AC_RANGE_CUSTOM_TAB : 
0));
-    }
 
-    /* Decode shaders */
-    for (int i = 0; i < 2; i++) { /* 16/32 bit */
-        for (int j = 0; j < 2; j++) { /* AC/Golomb */
-            for (int k = 0; k < 2; k++) { /* Normal/RGB */
-                AVHWFramesContext *dec_frames_ctx;
-                dec_frames_ctx = k ? (AVHWFramesContext 
*)fv->intermediate_frames_ref[i]->data :
-                                     (AVHWFramesContext 
*)avctx->hw_frames_ctx->data;
-                RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool,
-                                       spv, &fv->decode[i][j][k],
-                                       dec_frames_ctx,
-                                       (AVHWFramesContext 
*)avctx->hw_frames_ctx->data,
-                                       i,
-                                       !j ? AC_RANGE_CUSTOM_TAB : 
AC_GOLOMB_RICE,
-                                       k));
-            }
-        }
+        /* Decode shaders */
+        AVHWFramesContext *dctx;
+        dctx = is_rgb ? (AVHWFramesContext *)fv->intermediate_frames_ref->data 
:
+                        hwfc;
+        RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool,
+                               spv, &fv->decode[i],
+                               dctx,
+                               (AVHWFramesContext *)avctx->hw_frames_ctx->data,
+                               !i ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
+                               is_rgb));
     }
 
     /* Range coder data */
@@ -1092,21 +1085,17 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
                                         VK_FORMAT_UNDEFINED));
 
     /* Update decode global descriptors */
-    for (int i = 0; i < 2; i++) { /* 16/32 bit */
-        for (int j = 0; j < 2; j++) { /* AC/Golomb */
-            for (int k = 0; k < 2; k++) { /* Normal/RGB */
-                RET(ff_vk_shader_update_desc_buffer(&ctx->s, 
&ctx->exec_pool.contexts[0],
-                                                    &fv->decode[i][j][k], 0, 
0, 0,
-                                                    &fv->rangecoder_static_buf,
-                                                    0, 
fv->rangecoder_static_buf.size,
-                                                    VK_FORMAT_UNDEFINED));
-                RET(ff_vk_shader_update_desc_buffer(&ctx->s, 
&ctx->exec_pool.contexts[0],
-                                                    &fv->decode[i][j][k], 0, 
1, 0,
-                                                    &fv->quant_buf,
-                                                    0, fv->quant_buf.size,
-                                                    VK_FORMAT_UNDEFINED));
-            }
-        }
+    for (int i = 0; i < 2; i++) { /* AC/Golomb */
+        RET(ff_vk_shader_update_desc_buffer(&ctx->s, 
&ctx->exec_pool.contexts[0],
+                                            &fv->decode[i], 0, 0, 0,
+                                            &fv->rangecoder_static_buf,
+                                            0, fv->rangecoder_static_buf.size,
+                                            VK_FORMAT_UNDEFINED));
+        RET(ff_vk_shader_update_desc_buffer(&ctx->s, 
&ctx->exec_pool.contexts[0],
+                                            &fv->decode[i], 0, 1, 0,
+                                            &fv->quant_buf,
+                                            0, fv->quant_buf.size,
+                                            VK_FORMAT_UNDEFINED));
     }
 
 fail:
-- 
2.49.1


>From 39844692f9a93b4e86bbc2eb1e146c22b2e29d92 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Sun, 9 Nov 2025 10:52:20 +0100
Subject: [PATCH 08/11] dpx: add a context

This simply adds a context with 4 fields to enable hardware unpacking.
---
 libavcodec/dpx.c | 141 ++++++++++++++++++++++++-----------------------
 libavcodec/dpx.h |  13 +++++
 2 files changed, 86 insertions(+), 68 deletions(-)

diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 1b1ada316a..c8981bbf3a 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -121,6 +121,8 @@ static uint16_t read12in32(const uint8_t **ptr, uint32_t 
*lbuf,
 static int decode_frame(AVCodecContext *avctx, AVFrame *p,
                         int *got_frame, AVPacket *avpkt)
 {
+    DPXDecContext *dpx = avctx->priv_data;
+
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
     uint8_t *ptr[AV_NUM_DATA_POINTERS];
@@ -129,11 +131,11 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     char input_device[33] = { 0 };
 
     unsigned int offset;
-    int magic_num, endian;
-    int x, y, stride, i, j, ret;
-    int w, h, bits_per_color, descriptor, elements, packing;
+    int magic_num;
+    int x, y, i, j, ret;
+    int w, h, descriptor;
     int yuv, color_trc, color_spec;
-    int encoding, need_align = 0, unpadded_10bit = 0;
+    int encoding;
 
     unsigned int rgbBuffer = 0;
     int n_datum = 0;
@@ -149,15 +151,15 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     /* Check if the files "magic number" is "SDPX" which means it uses
      * big-endian or XPDS which is for little-endian files */
     if (magic_num == AV_RL32("SDPX")) {
-        endian = 0;
+        dpx->endian = 0;
     } else if (magic_num == AV_RB32("SDPX")) {
-        endian = 1;
+        dpx->endian = 1;
     } else {
         av_log(avctx, AV_LOG_ERROR, "DPX marker not found\n");
         return AVERROR_INVALIDDATA;
     }
 
-    offset = read32(&buf, endian);
+    offset = read32(&buf, dpx->endian);
     if (avpkt->size <= offset) {
         av_log(avctx, AV_LOG_ERROR, "Invalid data start offset\n");
         return AVERROR_INVALIDDATA;
@@ -174,7 +176,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     // Check encryption
     buf = avpkt->data + 660;
-    ret = read32(&buf, endian);
+    ret = read32(&buf, dpx->endian);
     if (ret != 0xFFFFFFFF) {
         avpriv_report_missing_feature(avctx, "Encryption");
         av_log(avctx, AV_LOG_WARNING, "The image is encrypted and may "
@@ -183,8 +185,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     // Need to end in 0x304 offset from start of file
     buf = avpkt->data + 0x304;
-    w = read32(&buf, endian);
-    h = read32(&buf, endian);
+    w = read32(&buf, dpx->endian);
+    h = read32(&buf, dpx->endian);
 
     if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
         return ret;
@@ -197,23 +199,22 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     // Need to end in 0x323 to read the bits per color
     buf += 3;
-    avctx->bits_per_raw_sample =
-    bits_per_color = buf[0];
+    avctx->bits_per_raw_sample = buf[0];
     buf++;
-    packing = read16(&buf, endian);
-    encoding = read16(&buf, endian);
+    dpx->packing = read16(&buf, dpx->endian);
+    encoding = read16(&buf, dpx->endian);
 
     if (encoding) {
         avpriv_report_missing_feature(avctx, "Encoding %d", encoding);
         return AVERROR_PATCHWELCOME;
     }
 
-    if (bits_per_color > 31)
+    if (avctx->bits_per_raw_sample > 31)
         return AVERROR_INVALIDDATA;
 
     buf += 820;
-    avctx->sample_aspect_ratio.num = read32(&buf, endian);
-    avctx->sample_aspect_ratio.den = read32(&buf, endian);
+    avctx->sample_aspect_ratio.num = read32(&buf, dpx->endian);
+    avctx->sample_aspect_ratio.den = read32(&buf, dpx->endian);
     if (avctx->sample_aspect_ratio.num > 0 && avctx->sample_aspect_ratio.den > 
0)
         av_reduce(&avctx->sample_aspect_ratio.num, 
&avctx->sample_aspect_ratio.den,
                    avctx->sample_aspect_ratio.num,  
avctx->sample_aspect_ratio.den,
@@ -224,7 +225,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     /* preferred frame rate from Motion-picture film header */
     if (offset >= 1724 + 4) {
         buf = avpkt->data + 1724;
-        i = read32(&buf, endian);
+        i = read32(&buf, dpx->endian);
         if(i && i != 0xFFFFFFFF) {
             AVRational q = av_d2q(av_int2float(i), 4096);
             if (q.num > 0 && q.den > 0)
@@ -236,7 +237,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     if (offset >= 1940 + 4 &&
         !(avctx->framerate.num && avctx->framerate.den)) {
         buf = avpkt->data + 1940;
-        i = read32(&buf, endian);
+        i = read32(&buf, dpx->endian);
         if(i && i != 0xFFFFFFFF) {
             AVRational q = av_d2q(av_int2float(i), 4096);
             if (q.num > 0 && q.den > 0)
@@ -253,7 +254,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         buf = avpkt->data + 1920;
         // read32 to native endian, av_bswap32 to opposite of native for
         // compatibility with av_timecode_make_smpte_tc_string2 etc
-        tc = av_bswap32(read32(&buf, endian));
+        tc = av_bswap32(read32(&buf, dpx->endian));
 
         if (i != 0xFFFFFFFF) {
             AVFrameSideData *tcside;
@@ -277,21 +278,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     /* color range from television header */
     if (offset >= 1964 + 4) {
         buf = avpkt->data + 1952;
-        i = read32(&buf, endian);
+        i = read32(&buf, dpx->endian);
 
         buf = avpkt->data + 1964;
-        j = read32(&buf, endian);
+        j = read32(&buf, dpx->endian);
 
         if (i != 0xFFFFFFFF && j != 0xFFFFFFFF) {
             float minCV, maxCV;
             minCV = av_int2float(i);
             maxCV = av_int2float(j);
-            if (bits_per_color >= 1 &&
-                minCV == 0.0f && maxCV == ((1U<<bits_per_color) - 1)) {
+            if (avctx->bits_per_raw_sample >= 1 &&
+                minCV == 0.0f && maxCV == ((1U<<avctx->bits_per_raw_sample) - 
1)) {
                 avctx->color_range = AVCOL_RANGE_JPEG;
-            } else if (bits_per_color >= 8 &&
-                       minCV == (1  <<(bits_per_color - 4)) &&
-                       maxCV == (235<<(bits_per_color - 8))) {
+            } else if (avctx->bits_per_raw_sample >= 8 &&
+                       minCV == (1  <<(avctx->bits_per_raw_sample - 4)) &&
+                       maxCV == (235<<(avctx->bits_per_raw_sample - 8))) {
                 avctx->color_range = AVCOL_RANGE_MPEG;
             }
         }
@@ -303,28 +304,28 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     case 3:  // B
     case 4:  // A
     case 6:  // Y
-        elements = 1;
+        dpx->components = 1;
         yuv = 1;
         break;
     case 50: // RGB
-        elements = 3;
+        dpx->components = 3;
         yuv = 0;
         break;
     case 52: // ABGR
     case 51: // RGBA
-        elements = 4;
+        dpx->components = 4;
         yuv = 0;
         break;
     case 100: // UYVY422
-        elements = 2;
+        dpx->components = 2;
         yuv = 1;
         break;
     case 102: // UYV444
-        elements = 3;
+        dpx->components = 3;
         yuv = 1;
         break;
     case 103: // UYVA4444
-        elements = 4;
+        dpx->components = 4;
         yuv = 1;
         break;
     default:
@@ -332,40 +333,40 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         return AVERROR_PATCHWELCOME;
     }
 
-    switch (bits_per_color) {
+    switch (avctx->bits_per_raw_sample) {
     case 8:
-        stride = avctx->width * elements;
+        dpx->stride = avctx->width * dpx->components;
         break;
     case 10:
-        if (!packing) {
+        if (!dpx->packing) {
             av_log(avctx, AV_LOG_ERROR, "Packing to 32bit required\n");
             return -1;
         }
-        stride = (avctx->width * elements + 2) / 3 * 4;
+        dpx->stride = (avctx->width * dpx->components + 2) / 3 * 4;
         break;
     case 12:
-        stride = avctx->width * elements;
-        if (packing) {
-            stride *= 2;
+        dpx->stride = avctx->width * dpx->components;
+        if (dpx->packing) {
+            dpx->stride *= 2;
         } else {
-            stride *= 3;
-            if (stride % 8) {
-                stride /= 8;
-                stride++;
-                stride *= 8;
+            dpx->stride *= 3;
+            if (dpx->stride % 8) {
+                dpx->stride /= 8;
+                dpx->stride++;
+                dpx->stride *= 8;
             }
-            stride /= 2;
+            dpx->stride /= 2;
         }
         break;
     case 16:
-        stride = 2 * avctx->width * elements;
+        dpx->stride = 2 * avctx->width * dpx->components;
         break;
     case 32:
-        stride = 4 * avctx->width * elements;
+        dpx->stride = 4 * avctx->width * dpx->components;
         break;
     case 1:
     case 64:
-        avpriv_report_missing_feature(avctx, "Depth %d", bits_per_color);
+        avpriv_report_missing_feature(avctx, "Depth %d", 
avctx->bits_per_raw_sample);
         return AVERROR_PATCHWELCOME;
     default:
         return AVERROR_INVALIDDATA;
@@ -458,8 +459,8 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     // Some devices do not pad 10bit samples to whole 32bit words per row
     if (!memcmp(input_device, "Scanity", 7) ||
         !memcmp(creator, "Lasergraphics Inc.", 18)) {
-        if (bits_per_color == 10)
-            unpadded_10bit = 1;
+        if (avctx->bits_per_raw_sample == 10)
+            dpx->unpadded_10bit = 1;
     }
 
     // Table 3c: Runs will always break at scan line boundaries. Packing
@@ -467,24 +468,24 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     // Unfortunately, the encoder produced invalid files, so attempt
     // to detect it
     // Also handle special case with unpadded content
-    need_align = FFALIGN(stride, 4);
-    if (need_align*avctx->height + (int64_t)offset > avpkt->size &&
-        (!unpadded_10bit || (avctx->width * avctx->height * elements + 2) / 3 
* 4 + (int64_t)offset > avpkt->size)) {
+    dpx->need_align = FFALIGN(dpx->stride, 4);
+    if (dpx->need_align*avctx->height + (int64_t)offset > avpkt->size &&
+        (!dpx->unpadded_10bit || (avctx->width * avctx->height * 
dpx->components + 2) / 3 * 4 + (int64_t)offset > avpkt->size)) {
         // Alignment seems unappliable, try without
-        if (stride*avctx->height + (int64_t)offset > avpkt->size || 
unpadded_10bit) {
+        if (dpx->stride*avctx->height + (int64_t)offset > avpkt->size || 
dpx->unpadded_10bit) {
             av_log(avctx, AV_LOG_ERROR, "Overread buffer. Invalid header?\n");
             return AVERROR_INVALIDDATA;
         } else {
             av_log(avctx, AV_LOG_INFO, "Decoding DPX without scanline "
                    "alignment.\n");
-            need_align = 0;
+            dpx->need_align = 0;
         }
     } else {
-        need_align -= stride;
-        stride = FFALIGN(stride, 4);
+        dpx->need_align -= dpx->stride;
+        dpx->stride = FFALIGN(dpx->stride, 4);
     }
 
-    switch (1000 * descriptor + 10 * bits_per_color + endian) {
+    switch (1000 * descriptor + 10 * avctx->bits_per_raw_sample + dpx->endian) 
{
     case 1081:
     case 1080:
     case 2081:
@@ -588,7 +589,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unsupported format %d\n",
-               1000 * descriptor + 10 * bits_per_color + endian);
+               1000 * descriptor + 10 * avctx->bits_per_raw_sample + 
dpx->endian);
         return AVERROR_PATCHWELCOME;
     }
 
@@ -599,18 +600,21 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     // Move pointer to offset from start of file
     buf =  avpkt->data + offset;
+    dpx->frame = p;
 
+    int elements = dpx->components;
+    int endian = dpx->endian;
     for (i=0; i<AV_NUM_DATA_POINTERS; i++)
         ptr[i] = p->data[i];
 
-    switch (bits_per_color) {
+    switch (avctx->bits_per_raw_sample) {
     case 10:
         for (x = 0; x < avctx->height; x++) {
             uint16_t *dst[4] = {(uint16_t*)ptr[0],
                                 (uint16_t*)ptr[1],
                                 (uint16_t*)ptr[2],
                                 (uint16_t*)ptr[3]};
-            int shift = elements > 1 ? packing == 1 ? 22 : 20 : packing == 1 ? 
2 : 0;
+            int shift = elements > 1 ? dpx->packing == 1 ? 22 : 20 : 
dpx->packing == 1 ? 2 : 0;
             for (y = 0; y < avctx->width; y++) {
                 if (elements >= 3)
                     *dst[2]++ = read10in32(&buf, &rgbBuffer,
@@ -629,7 +633,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
                     read10in32(&buf, &rgbBuffer,
                                &n_datum, endian, shift);
             }
-            if (!unpadded_10bit)
+            if (!dpx->unpadded_10bit)
                 n_datum = 0;
             for (i = 0; i < elements; i++)
                 ptr[i] += p->linesize[i];
@@ -641,9 +645,9 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
                                 (uint16_t*)ptr[1],
                                 (uint16_t*)ptr[2],
                                 (uint16_t*)ptr[3]};
-            int shift = packing == 1 ? 4 : 0;
+            int shift = dpx->packing == 1 ? 4 : 0;
             for (y = 0; y < avctx->width; y++) {
-                if (packing) {
+                if (dpx->packing) {
                     if (elements >= 3)
                         *dst[2]++ = read16(&buf, endian) >> shift & 0xFFF;
                     *dst[0]++ = read16(&buf, endian) >> shift & 0xFFF;
@@ -669,13 +673,13 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
             for (i = 0; i < elements; i++)
                 ptr[i] += p->linesize[i];
             // Jump to next aligned position
-            buf += need_align;
+            buf += dpx->need_align;
         }
         break;
     case 32:
         if (elements == 1) {
             av_image_copy_plane(ptr[0], p->linesize[0],
-                                buf, stride,
+                                buf, dpx->stride,
                                 elements * avctx->width * 4, avctx->height);
         } else {
             for (y = 0; y < avctx->height; y++) {
@@ -722,7 +726,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
             }
         } else {
         av_image_copy_plane(ptr[0], p->linesize[0],
-                            buf, stride,
+                            buf, dpx->stride,
                             elements * avctx->width, avctx->height);
         }
         break;
@@ -736,6 +740,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 const FFCodec ff_dpx_decoder = {
     .p.name         = "dpx",
     CODEC_LONG_NAME("DPX (Digital Picture Exchange) image"),
+    .priv_data_size = sizeof(DPXDecContext),
     .p.type         = AVMEDIA_TYPE_VIDEO,
     .p.id           = AV_CODEC_ID_DPX,
     FF_CODEC_DECODE_CB(decode_frame),
diff --git a/libavcodec/dpx.h b/libavcodec/dpx.h
index 800c651e5a..35e8aa690f 100644
--- a/libavcodec/dpx.h
+++ b/libavcodec/dpx.h
@@ -22,6 +22,8 @@
 #ifndef AVCODEC_DPX_H
 #define AVCODEC_DPX_H
 
+#include "libavutil/frame.h"
+
 enum DPX_TRC {
     DPX_TRC_USER_DEFINED       = 0,
     DPX_TRC_PRINTING_DENSITY   = 1,
@@ -54,4 +56,15 @@ enum DPX_COL_SPEC {
     /* 12 = N/A */
 };
 
+typedef struct DPXDecContext {
+    AVFrame *frame;
+
+    int packing;
+    int stride;
+    int endian;
+    int components;
+    int unpadded_10bit;
+    int need_align;
+} DPXDecContext;
+
 #endif /* AVCODEC_DPX_H */
-- 
2.49.1


>From 129e9311dec97e61608b5e8bd1bae2a9163cf6d2 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Sun, 9 Nov 2025 11:11:54 +0100
Subject: [PATCH 09/11] dpxdec: move data parsing into a separate function

---
 libavcodec/dpx.c | 275 ++++++++++++++++++++++++-----------------------
 1 file changed, 141 insertions(+), 134 deletions(-)

diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index c8981bbf3a..96d4647389 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -118,6 +118,145 @@ static uint16_t read12in32(const uint8_t **ptr, uint32_t 
*lbuf,
     }
 }
 
+static void unpack_frame(AVCodecContext *avctx, AVFrame *p, const uint8_t *buf,
+                         int elements, int endian)
+{
+    int i, x, y;
+    DPXDecContext *dpx = avctx->priv_data;
+
+    uint8_t *ptr[AV_NUM_DATA_POINTERS];
+    unsigned int rgbBuffer = 0;
+    int n_datum = 0;
+
+    for (i=0; i<AV_NUM_DATA_POINTERS; i++)
+        ptr[i] = p->data[i];
+
+    switch (avctx->bits_per_raw_sample) {
+    case 10:
+        for (x = 0; x < avctx->height; x++) {
+            uint16_t *dst[4] = {(uint16_t*)ptr[0],
+                                (uint16_t*)ptr[1],
+                                (uint16_t*)ptr[2],
+                                (uint16_t*)ptr[3]};
+            int shift = elements > 1 ? dpx->packing == 1 ? 22 : 20 : 
dpx->packing == 1 ? 2 : 0;
+            for (y = 0; y < avctx->width; y++) {
+                if (elements >= 3)
+                    *dst[2]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
+                if (elements == 1)
+                    *dst[0]++ = read10in32_gray(&buf, &rgbBuffer,
+                                                &n_datum, endian, shift);
+                else
+                    *dst[0]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
+                if (elements >= 2)
+                    *dst[1]++ = read10in32(&buf, &rgbBuffer,
+                                           &n_datum, endian, shift);
+                if (elements == 4)
+                    *dst[3]++ =
+                    read10in32(&buf, &rgbBuffer,
+                               &n_datum, endian, shift);
+            }
+            if (!dpx->unpadded_10bit)
+                n_datum = 0;
+            for (i = 0; i < elements; i++)
+                ptr[i] += p->linesize[i];
+        }
+        break;
+    case 12:
+        for (x = 0; x < avctx->height; x++) {
+            uint16_t *dst[4] = {(uint16_t*)ptr[0],
+                                (uint16_t*)ptr[1],
+                                (uint16_t*)ptr[2],
+                                (uint16_t*)ptr[3]};
+            int shift = dpx->packing == 1 ? 4 : 0;
+            for (y = 0; y < avctx->width; y++) {
+                if (dpx->packing) {
+                    if (elements >= 3)
+                        *dst[2]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    *dst[0]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    if (elements >= 2)
+                        *dst[1]++ = read16(&buf, endian) >> shift & 0xFFF;
+                    if (elements == 4)
+                        *dst[3]++ = read16(&buf, endian) >> shift & 0xFFF;
+                } else {
+                    if (elements >= 3)
+                        *dst[2]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                    *dst[0]++ = read12in32(&buf, &rgbBuffer,
+                                           &n_datum, endian);
+                    if (elements >= 2)
+                        *dst[1]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                    if (elements == 4)
+                        *dst[3]++ = read12in32(&buf, &rgbBuffer,
+                                               &n_datum, endian);
+                }
+            }
+            n_datum = 0;
+            for (i = 0; i < elements; i++)
+                ptr[i] += p->linesize[i];
+            // Jump to next aligned position
+            buf += dpx->need_align;
+        }
+        break;
+    case 32:
+        if (elements == 1) {
+            av_image_copy_plane(ptr[0], p->linesize[0],
+                                buf, dpx->stride,
+                                elements * avctx->width * 4, avctx->height);
+        } else {
+            for (y = 0; y < avctx->height; y++) {
+                ptr[0] = p->data[0] + y * p->linesize[0];
+                ptr[1] = p->data[1] + y * p->linesize[1];
+                ptr[2] = p->data[2] + y * p->linesize[2];
+                ptr[3] = p->data[3] + y * p->linesize[3];
+                for (x = 0; x < avctx->width; x++) {
+                    AV_WN32(ptr[2], AV_RN32(buf));
+                    AV_WN32(ptr[0], AV_RN32(buf + 4));
+                    AV_WN32(ptr[1], AV_RN32(buf + 8));
+                    if (avctx->pix_fmt == AV_PIX_FMT_GBRAPF32BE ||
+                        avctx->pix_fmt == AV_PIX_FMT_GBRAPF32LE) {
+                        AV_WN32(ptr[3], AV_RN32(buf + 12));
+                        buf += 4;
+                        ptr[3] += 4;
+                    }
+
+                    buf += 12;
+                    ptr[2] += 4;
+                    ptr[0] += 4;
+                    ptr[1] += 4;
+                }
+            }
+        }
+        break;
+    case 16:
+        elements *= 2;
+    case 8:
+        if (   avctx->pix_fmt == AV_PIX_FMT_YUVA444P
+            || avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
+            for (x = 0; x < avctx->height; x++) {
+                ptr[0] = p->data[0] + x * p->linesize[0];
+                ptr[1] = p->data[1] + x * p->linesize[1];
+                ptr[2] = p->data[2] + x * p->linesize[2];
+                ptr[3] = p->data[3] + x * p->linesize[3];
+                for (y = 0; y < avctx->width; y++) {
+                    *ptr[1]++ = *buf++;
+                    *ptr[0]++ = *buf++;
+                    *ptr[2]++ = *buf++;
+                    if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P)
+                        *ptr[3]++ = *buf++;
+                }
+            }
+        } else {
+        av_image_copy_plane(ptr[0], p->linesize[0],
+                            buf, dpx->stride,
+                            elements * avctx->width, avctx->height);
+        }
+        break;
+    }
+}
+
 static int decode_frame(AVCodecContext *avctx, AVFrame *p,
                         int *got_frame, AVPacket *avpkt)
 {
@@ -125,21 +264,17 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
 
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
-    uint8_t *ptr[AV_NUM_DATA_POINTERS];
     uint32_t header_version, version = 0;
     char creator[101] = { 0 };
     char input_device[33] = { 0 };
 
     unsigned int offset;
     int magic_num;
-    int x, y, i, j, ret;
+    int i, j, ret;
     int w, h, descriptor;
     int yuv, color_trc, color_spec;
     int encoding;
 
-    unsigned int rgbBuffer = 0;
-    int n_datum = 0;
-
     if (avpkt->size <= 1634) {
         av_log(avctx, AV_LOG_ERROR, "Packet too small for DPX header\n");
         return AVERROR_INVALIDDATA;
@@ -602,135 +737,7 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     buf =  avpkt->data + offset;
     dpx->frame = p;
 
-    int elements = dpx->components;
-    int endian = dpx->endian;
-    for (i=0; i<AV_NUM_DATA_POINTERS; i++)
-        ptr[i] = p->data[i];
-
-    switch (avctx->bits_per_raw_sample) {
-    case 10:
-        for (x = 0; x < avctx->height; x++) {
-            uint16_t *dst[4] = {(uint16_t*)ptr[0],
-                                (uint16_t*)ptr[1],
-                                (uint16_t*)ptr[2],
-                                (uint16_t*)ptr[3]};
-            int shift = elements > 1 ? dpx->packing == 1 ? 22 : 20 : 
dpx->packing == 1 ? 2 : 0;
-            for (y = 0; y < avctx->width; y++) {
-                if (elements >= 3)
-                    *dst[2]++ = read10in32(&buf, &rgbBuffer,
-                                           &n_datum, endian, shift);
-                if (elements == 1)
-                    *dst[0]++ = read10in32_gray(&buf, &rgbBuffer,
-                                                &n_datum, endian, shift);
-                else
-                    *dst[0]++ = read10in32(&buf, &rgbBuffer,
-                                           &n_datum, endian, shift);
-                if (elements >= 2)
-                    *dst[1]++ = read10in32(&buf, &rgbBuffer,
-                                           &n_datum, endian, shift);
-                if (elements == 4)
-                    *dst[3]++ =
-                    read10in32(&buf, &rgbBuffer,
-                               &n_datum, endian, shift);
-            }
-            if (!dpx->unpadded_10bit)
-                n_datum = 0;
-            for (i = 0; i < elements; i++)
-                ptr[i] += p->linesize[i];
-        }
-        break;
-    case 12:
-        for (x = 0; x < avctx->height; x++) {
-            uint16_t *dst[4] = {(uint16_t*)ptr[0],
-                                (uint16_t*)ptr[1],
-                                (uint16_t*)ptr[2],
-                                (uint16_t*)ptr[3]};
-            int shift = dpx->packing == 1 ? 4 : 0;
-            for (y = 0; y < avctx->width; y++) {
-                if (dpx->packing) {
-                    if (elements >= 3)
-                        *dst[2]++ = read16(&buf, endian) >> shift & 0xFFF;
-                    *dst[0]++ = read16(&buf, endian) >> shift & 0xFFF;
-                    if (elements >= 2)
-                        *dst[1]++ = read16(&buf, endian) >> shift & 0xFFF;
-                    if (elements == 4)
-                        *dst[3]++ = read16(&buf, endian) >> shift & 0xFFF;
-                } else {
-                    if (elements >= 3)
-                        *dst[2]++ = read12in32(&buf, &rgbBuffer,
-                                               &n_datum, endian);
-                    *dst[0]++ = read12in32(&buf, &rgbBuffer,
-                                           &n_datum, endian);
-                    if (elements >= 2)
-                        *dst[1]++ = read12in32(&buf, &rgbBuffer,
-                                               &n_datum, endian);
-                    if (elements == 4)
-                        *dst[3]++ = read12in32(&buf, &rgbBuffer,
-                                               &n_datum, endian);
-                }
-            }
-            n_datum = 0;
-            for (i = 0; i < elements; i++)
-                ptr[i] += p->linesize[i];
-            // Jump to next aligned position
-            buf += dpx->need_align;
-        }
-        break;
-    case 32:
-        if (elements == 1) {
-            av_image_copy_plane(ptr[0], p->linesize[0],
-                                buf, dpx->stride,
-                                elements * avctx->width * 4, avctx->height);
-        } else {
-            for (y = 0; y < avctx->height; y++) {
-                ptr[0] = p->data[0] + y * p->linesize[0];
-                ptr[1] = p->data[1] + y * p->linesize[1];
-                ptr[2] = p->data[2] + y * p->linesize[2];
-                ptr[3] = p->data[3] + y * p->linesize[3];
-                for (x = 0; x < avctx->width; x++) {
-                    AV_WN32(ptr[2], AV_RN32(buf));
-                    AV_WN32(ptr[0], AV_RN32(buf + 4));
-                    AV_WN32(ptr[1], AV_RN32(buf + 8));
-                    if (avctx->pix_fmt == AV_PIX_FMT_GBRAPF32BE ||
-                        avctx->pix_fmt == AV_PIX_FMT_GBRAPF32LE) {
-                        AV_WN32(ptr[3], AV_RN32(buf + 12));
-                        buf += 4;
-                        ptr[3] += 4;
-                    }
-
-                    buf += 12;
-                    ptr[2] += 4;
-                    ptr[0] += 4;
-                    ptr[1] += 4;
-                }
-            }
-        }
-        break;
-    case 16:
-        elements *= 2;
-    case 8:
-        if (   avctx->pix_fmt == AV_PIX_FMT_YUVA444P
-            || avctx->pix_fmt == AV_PIX_FMT_YUV444P) {
-            for (x = 0; x < avctx->height; x++) {
-                ptr[0] = p->data[0] + x * p->linesize[0];
-                ptr[1] = p->data[1] + x * p->linesize[1];
-                ptr[2] = p->data[2] + x * p->linesize[2];
-                ptr[3] = p->data[3] + x * p->linesize[3];
-                for (y = 0; y < avctx->width; y++) {
-                    *ptr[1]++ = *buf++;
-                    *ptr[0]++ = *buf++;
-                    *ptr[2]++ = *buf++;
-                    if (avctx->pix_fmt == AV_PIX_FMT_YUVA444P)
-                        *ptr[3]++ = *buf++;
-                }
-            }
-        } else {
-        av_image_copy_plane(ptr[0], p->linesize[0],
-                            buf, dpx->stride,
-                            elements * avctx->width, avctx->height);
-        }
-        break;
-    }
+    unpack_frame(avctx, p, buf, dpx->components, dpx->endian);
 
     *got_frame = 1;
 
-- 
2.49.1


>From 478f3181c1053a53dc74d90bca8a0b828d70bd2e Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 29 Oct 2025 14:24:10 +0100
Subject: [PATCH 10/11] dpxdec: add hardware decoding hooks

---
 libavcodec/dpx.c | 142 ++++++++++++++++++++++++++++++++++++++---------
 libavcodec/dpx.h |   3 +
 2 files changed, 118 insertions(+), 27 deletions(-)

diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 96d4647389..47efcb7572 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -29,6 +29,11 @@
 #include "decode.h"
 #include "dpx.h"
 
+#include "thread.h"
+#include "hwconfig.h"
+#include "hwaccel_internal.h"
+#include "config_components.h"
+
 static unsigned int read16(const uint8_t **ptr, int is_big)
 {
     unsigned int temp;
@@ -257,11 +262,26 @@ static void unpack_frame(AVCodecContext *avctx, AVFrame 
*p, const uint8_t *buf,
     }
 }
 
+static enum AVPixelFormat get_pixel_format(AVCodecContext *avctx,
+                                           enum AVPixelFormat pix_fmt)
+{
+    enum AVPixelFormat pix_fmts[] = {
+#if CONFIG_DPX_VULKAN_HWACCEL
+        AV_PIX_FMT_VULKAN,
+#endif
+        pix_fmt,
+        AV_PIX_FMT_NONE,
+    };
+
+    return ff_get_format(avctx, pix_fmts);
+}
+
 static int decode_frame(AVCodecContext *avctx, AVFrame *p,
                         int *got_frame, AVPacket *avpkt)
 {
     DPXDecContext *dpx = avctx->priv_data;
 
+    enum AVPixelFormat pix_fmt;
     const uint8_t *buf = avpkt->data;
     int buf_size       = avpkt->size;
     uint32_t header_version, version = 0;
@@ -631,96 +651,96 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     case 4080:
     case 6081:
     case 6080:
-        avctx->pix_fmt = AV_PIX_FMT_GRAY8;
+        pix_fmt = AV_PIX_FMT_GRAY8;
         break;
     case 6121:
     case 6120:
-        avctx->pix_fmt = AV_PIX_FMT_GRAY12;
+        pix_fmt = AV_PIX_FMT_GRAY12;
         break;
     case 1320:
     case 2320:
     case 3320:
     case 4320:
     case 6320:
-        avctx->pix_fmt = AV_PIX_FMT_GRAYF32LE;
+        pix_fmt = AV_PIX_FMT_GRAYF32LE;
         break;
     case 1321:
     case 2321:
     case 3321:
     case 4321:
     case 6321:
-        avctx->pix_fmt = AV_PIX_FMT_GRAYF32BE;
+        pix_fmt = AV_PIX_FMT_GRAYF32BE;
         break;
     case 50081:
     case 50080:
-        avctx->pix_fmt = AV_PIX_FMT_RGB24;
+        pix_fmt = AV_PIX_FMT_RGB24;
         break;
     case 52081:
     case 52080:
-        avctx->pix_fmt = AV_PIX_FMT_ABGR;
+        pix_fmt = AV_PIX_FMT_ABGR;
         break;
     case 51081:
     case 51080:
-        avctx->pix_fmt = AV_PIX_FMT_RGBA;
+        pix_fmt = AV_PIX_FMT_RGBA;
         break;
     case 50100:
     case 50101:
-        avctx->pix_fmt = AV_PIX_FMT_GBRP10;
+        pix_fmt = AV_PIX_FMT_GBRP10;
         break;
     case 51100:
     case 51101:
-        avctx->pix_fmt = AV_PIX_FMT_GBRAP10;
+        pix_fmt = AV_PIX_FMT_GBRAP10;
         break;
     case 50120:
     case 50121:
-        avctx->pix_fmt = AV_PIX_FMT_GBRP12;
+        pix_fmt = AV_PIX_FMT_GBRP12;
         break;
     case 51120:
     case 51121:
-        avctx->pix_fmt = AV_PIX_FMT_GBRAP12;
+        pix_fmt = AV_PIX_FMT_GBRAP12;
         break;
     case 6100:
     case 6101:
-        avctx->pix_fmt = AV_PIX_FMT_GRAY10;
+        pix_fmt = AV_PIX_FMT_GRAY10;
         break;
     case 6161:
-        avctx->pix_fmt = AV_PIX_FMT_GRAY16BE;
+        pix_fmt = AV_PIX_FMT_GRAY16BE;
         break;
     case 6160:
-        avctx->pix_fmt = AV_PIX_FMT_GRAY16LE;
+        pix_fmt = AV_PIX_FMT_GRAY16LE;
         break;
     case 50161:
-        avctx->pix_fmt = AV_PIX_FMT_RGB48BE;
+        pix_fmt = AV_PIX_FMT_RGB48BE;
         break;
     case 50160:
-        avctx->pix_fmt = AV_PIX_FMT_RGB48LE;
+        pix_fmt = AV_PIX_FMT_RGB48LE;
         break;
     case 51161:
-        avctx->pix_fmt = AV_PIX_FMT_RGBA64BE;
+        pix_fmt = AV_PIX_FMT_RGBA64BE;
         break;
     case 51160:
-        avctx->pix_fmt = AV_PIX_FMT_RGBA64LE;
+        pix_fmt = AV_PIX_FMT_RGBA64LE;
         break;
     case 50320:
-        avctx->pix_fmt = AV_PIX_FMT_GBRPF32LE;
+        pix_fmt = AV_PIX_FMT_GBRPF32LE;
         break;
     case 50321:
-        avctx->pix_fmt = AV_PIX_FMT_GBRPF32BE;
+        pix_fmt = AV_PIX_FMT_GBRPF32BE;
         break;
     case 51320:
-        avctx->pix_fmt = AV_PIX_FMT_GBRAPF32LE;
+        pix_fmt = AV_PIX_FMT_GBRAPF32LE;
         break;
     case 51321:
-        avctx->pix_fmt = AV_PIX_FMT_GBRAPF32BE;
+        pix_fmt = AV_PIX_FMT_GBRAPF32BE;
         break;
     case 100081:
-        avctx->pix_fmt = AV_PIX_FMT_UYVY422;
+        pix_fmt = AV_PIX_FMT_UYVY422;
         break;
     case 102081:
-        avctx->pix_fmt = AV_PIX_FMT_YUV444P;
+        pix_fmt = AV_PIX_FMT_YUV444P;
         break;
     case 103081:
-        avctx->pix_fmt = AV_PIX_FMT_YUVA444P;
+        pix_fmt = AV_PIX_FMT_YUVA444P;
         break;
     default:
         av_log(avctx, AV_LOG_ERROR, "Unsupported format %d\n",
@@ -728,6 +748,16 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
         return AVERROR_PATCHWELCOME;
     }
 
+    if (pix_fmt != dpx->pix_fmt) {
+        dpx->pix_fmt = pix_fmt;
+
+        ret = get_pixel_format(avctx, pix_fmt);
+        if (ret < 0)
+            return ret;
+
+        avctx->pix_fmt = ret;
+    }
+
     ff_set_sar(avctx, avctx->sample_aspect_ratio);
 
     if ((ret = ff_get_buffer(avctx, p, 0)) < 0)
@@ -737,13 +767,65 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p,
     buf =  avpkt->data + offset;
     dpx->frame = p;
 
-    unpack_frame(avctx, p, buf, dpx->components, dpx->endian);
+    /* Start */
+    if (avctx->hwaccel) {
+        const FFHWAccel *hwaccel = ffhwaccel(avctx->hwaccel);
+
+        ret = ff_hwaccel_frame_priv_alloc(avctx, 
&dpx->hwaccel_picture_private);
+        if (ret < 0)
+            return ret;
+
+        ret = hwaccel->start_frame(avctx, avpkt->buf, buf, avpkt->size - 
offset);
+        if (ret < 0)
+            return ret;
+
+        ret = hwaccel->decode_slice(avctx, buf, avpkt->size - offset);
+        if (ret < 0)
+            return ret;
+
+        ret = hwaccel->end_frame(avctx);
+        if (ret < 0)
+            return ret;
+
+        av_refstruct_unref(&dpx->hwaccel_picture_private);
+    } else {
+        unpack_frame(avctx, p, buf, dpx->components, dpx->endian);
+    }
+
+    p->pict_type = AV_PICTURE_TYPE_I;
+    p->flags    |= AV_FRAME_FLAG_KEY;
 
     *got_frame = 1;
 
     return buf_size;
 }
 
+#if HAVE_THREADS
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext 
*src)
+{
+    DPXDecContext *ssrc = src->priv_data;
+    DPXDecContext *sdst = dst->priv_data;
+
+    sdst->pix_fmt = ssrc->pix_fmt;
+
+    return 0;
+}
+#endif
+
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    DPXDecContext *dpx = avctx->priv_data;
+    av_refstruct_unref(&dpx->hwaccel_picture_private);
+    return 0;
+}
+
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    DPXDecContext *dpx = avctx->priv_data;
+    dpx->pix_fmt = AV_PIX_FMT_NONE;
+    return 0;
+}
+
 const FFCodec ff_dpx_decoder = {
     .p.name         = "dpx",
     CODEC_LONG_NAME("DPX (Digital Picture Exchange) image"),
@@ -751,5 +833,11 @@ const FFCodec ff_dpx_decoder = {
     .p.type         = AVMEDIA_TYPE_VIDEO,
     .p.id           = AV_CODEC_ID_DPX,
     FF_CODEC_DECODE_CB(decode_frame),
-    .p.capabilities = AV_CODEC_CAP_DR1,
+    .init           = decode_init,
+    .close          = decode_end,
+    UPDATE_THREAD_CONTEXT(update_thread_context),
+    .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+        NULL
+    },
 };
diff --git a/libavcodec/dpx.h b/libavcodec/dpx.h
index 35e8aa690f..c9d95af1f1 100644
--- a/libavcodec/dpx.h
+++ b/libavcodec/dpx.h
@@ -23,6 +23,7 @@
 #define AVCODEC_DPX_H
 
 #include "libavutil/frame.h"
+#include "libavutil/pixfmt.h"
 
 enum DPX_TRC {
     DPX_TRC_USER_DEFINED       = 0,
@@ -58,6 +59,8 @@ enum DPX_COL_SPEC {
 
 typedef struct DPXDecContext {
     AVFrame *frame;
+    void *hwaccel_picture_private;
+    enum AVPixelFormat pix_fmt;
 
     int packing;
     int stride;
-- 
2.49.1


>From d1fc1d70c8b55410510c7711c9e2b9238430fca9 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Wed, 29 Oct 2025 15:27:47 +0100
Subject: [PATCH 11/11] dpxdec: add a Vulkan hwaccel

---
 configure                         |   2 +
 libavcodec/Makefile               |   1 +
 libavcodec/dpx.c                  |   5 +
 libavcodec/hwaccels.h             |   1 +
 libavcodec/vulkan/Makefile        |   4 +
 libavcodec/vulkan/dpx_copy.comp   |  42 +++
 libavcodec/vulkan/dpx_unpack.comp |  83 ++++++
 libavcodec/vulkan_decode.c        |  12 +
 libavcodec/vulkan_dpx.c           | 464 ++++++++++++++++++++++++++++++
 9 files changed, 614 insertions(+)
 create mode 100644 libavcodec/vulkan/dpx_copy.comp
 create mode 100644 libavcodec/vulkan/dpx_unpack.comp
 create mode 100644 libavcodec/vulkan_dpx.c

diff --git a/configure b/configure
index e2caf3b24c..b3eb22508c 100755
--- a/configure
+++ b/configure
@@ -3262,6 +3262,8 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
 av1_videotoolbox_hwaccel_select="av1_decoder"
 av1_vulkan_hwaccel_deps="vulkan"
 av1_vulkan_hwaccel_select="av1_decoder"
+dpx_vulkan_hwaccel_deps="vulkan spirv_compiler"
+dpx_vulkan_hwaccel_select="dpx_decoder"
 ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
 ffv1_vulkan_hwaccel_select="ffv1_decoder"
 h263_vaapi_hwaccel_deps="vaapi"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 0cd2408865..50bb6cc356 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1049,6 +1049,7 @@ OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)          += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)          += vdpau_av1.o
 OBJS-$(CONFIG_AV1_VIDEOTOOLBOX_HWACCEL)   += videotoolbox_av1.o
 OBJS-$(CONFIG_AV1_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_av1.o
+OBJS-$(CONFIG_DPX_VULKAN_HWACCEL)         += vulkan_decode.o vulkan_dpx.o
 OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL)        += vulkan_decode.o ffv1_vulkan.o 
vulkan_ffv1.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL)         += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
diff --git a/libavcodec/dpx.c b/libavcodec/dpx.c
index 47efcb7572..7355b50f7a 100644
--- a/libavcodec/dpx.c
+++ b/libavcodec/dpx.c
@@ -837,7 +837,12 @@ const FFCodec ff_dpx_decoder = {
     .close          = decode_end,
     UPDATE_THREAD_CONTEXT(update_thread_context),
     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP |
+                      FF_CODEC_CAP_SKIP_FRAME_FILL_PARAM,
     .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_DPX_VULKAN_HWACCEL
+        HWACCEL_VULKAN(dpx),
+#endif
         NULL
     },
 };
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index 638a7bfb1d..3de191288a 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -28,6 +28,7 @@ extern const struct FFHWAccel ff_av1_vaapi_hwaccel;
 extern const struct FFHWAccel ff_av1_vdpau_hwaccel;
 extern const struct FFHWAccel ff_av1_videotoolbox_hwaccel;
 extern const struct FFHWAccel ff_av1_vulkan_hwaccel;
+extern const struct FFHWAccel ff_dpx_vulkan_hwaccel;
 extern const struct FFHWAccel ff_ffv1_vulkan_hwaccel;
 extern const struct FFHWAccel ff_h263_vaapi_hwaccel;
 extern const struct FFHWAccel ff_h263_videotoolbox_hwaccel;
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index ec3015fee6..bf206488d5 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -22,6 +22,10 @@ OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/common.o \
                                         vulkan/prores_vld.o \
                                         vulkan/prores_idct.o
 
+OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \
+                                     vulkan/dpx_unpack.o \
+                                     vulkan/dpx_copy.o
+
 VULKAN = $(subst $(SRC_PATH)/,,$(wildcard 
$(SRC_PATH)/libavcodec/vulkan/*.comp))
 .SECONDARY: $(VULKAN:.comp=.c)
 libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/dpx_copy.comp b/libavcodec/vulkan/dpx_copy.comp
new file mode 100644
index 0000000000..948cb32c5f
--- /dev/null
+++ b/libavcodec/vulkan/dpx_copy.comp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2025 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+TYPE read_data(uint off)
+{
+#ifdef BIG_ENDIAN
+    return TYPE_REVERSE(data[off]);
+#else
+    return data[off];
+#endif
+}
+
+void main(void)
+{
+    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
+    if (!IS_WITHIN(pos, imageSize(dst[0])))
+        return;
+
+    const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
+    for (int i = 0; i < COMPONENTS; i++) {
+        TYPE val = read_data((pos.y*imageSize(dst[0]).x + pos.x)*COMPONENTS + 
i);
+        val >>= SHIFT;
+        imageStore(dst[fmt_lut[i]], pos, TYPE_VEC(val));
+    }
+}
diff --git a/libavcodec/vulkan/dpx_unpack.comp 
b/libavcodec/vulkan/dpx_unpack.comp
new file mode 100644
index 0000000000..516b0cb05b
--- /dev/null
+++ b/libavcodec/vulkan/dpx_unpack.comp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2025 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+uint32_t read_data(uint off)
+{
+#ifdef BIG_ENDIAN
+    return reverse4(data[off]);
+#else
+    return data[off];
+#endif
+}
+
+#ifdef PACKED_10BIT
+i16vec4 parse_packed_in_32(ivec2 pos, int stride)
+{
+    uint32_t d = read_data(pos.y*stride + pos.x);
+    i16vec4 v;
+    d = d << 10 | d >> 22 & 0x3FFFFF;
+    v[0] = int16_t(d & 0x3FF);
+    d = d << 10 | d >> 22 & 0x3FFFFF;
+    v[1] = int16_t(d & 0x3FF);
+    d = d << 10 | d >> 22 & 0x3FFFFF;
+    v[2] = int16_t(d & 0x3FF);
+    v[3] = int16_t(0);
+    return v;
+}
+#else
+i16vec4 parse_packed_in_32(ivec2 pos, int stride)
+{
+    uint line_off = pos.y*(stride*BITS_PER_COMP*COMPONENTS +
+                           (need_align << 3));
+    uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS;
+
+    uint off = (line_off + pix_off >> 5);
+    uint bit = pix_off & 0x1f;
+
+    uint32_t d0 = read_data(off + 0);
+    uint32_t d1 = read_data(off + 1);
+
+    uint64_t combined = (uint64_t(d1) << 32) | d0;
+    combined >>= bit;
+
+    return i16vec4(combined,
+                   combined >> (BITS_PER_COMP*1),
+                   combined >> (BITS_PER_COMP*2),
+                   combined >> (BITS_PER_COMP*3)) &
+           int16_t((1 << BITS_PER_COMP) - 1);
+}
+#endif
+
+void main(void)
+{
+    ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
+    if (!IS_WITHIN(pos, imageSize(dst[0])))
+        return;
+
+    i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x);
+
+#if BITS_PER_COMP == 10 && COMPONENTS == 3
+    imageStore(dst[0], pos, p);
+#else
+    const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
+    for (uint i = 0; i < COMPONENTS; i++)
+        imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
+#endif
+}
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index d22ccc21aa..ca9a9042ad 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -26,6 +26,7 @@
 
 #define DECODER_IS_SDR(codec_id) \
     (((codec_id) == AV_CODEC_ID_FFV1) || \
+     ((codec_id) == AV_CODEC_ID_DPX) || \
      ((codec_id) == AV_CODEC_ID_PRORES_RAW) || \
      ((codec_id) == AV_CODEC_ID_PRORES))
 
@@ -50,6 +51,9 @@ extern const FFVulkanDecodeDescriptor 
ff_vk_dec_prores_raw_desc;
 #if CONFIG_PRORES_VULKAN_HWACCEL
 extern const FFVulkanDecodeDescriptor ff_vk_dec_prores_desc;
 #endif
+#if CONFIG_DPX_VULKAN_HWACCEL
+extern const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc;
+#endif
 
 static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_H264_VULKAN_HWACCEL
@@ -73,6 +77,9 @@ static const FFVulkanDecodeDescriptor *dec_descs[] = {
 #if CONFIG_PRORES_VULKAN_HWACCEL
     &ff_vk_dec_prores_desc,
 #endif
+#if CONFIG_DPX_VULKAN_HWACCEL
+    &ff_vk_dec_dpx_desc,
+#endif
 };
 
 typedef struct FFVulkanDecodeProfileData {
@@ -1117,6 +1124,11 @@ int ff_vk_frame_params(AVCodecContext *avctx, 
AVBufferRef *hw_frames_ctx)
             /* This should be more efficient for downloading and using */
             frames_ctx->sw_format = AV_PIX_FMT_RGBA64;
             break;
+        case AV_PIX_FMT_RGB48LE:
+        case AV_PIX_FMT_RGB48BE: /* DPX outputs RGB48BE, so we need both */
+            /* Almost nothing supports native 3-component RGB */
+            frames_ctx->sw_format = AV_PIX_FMT_GBRP16;
+            break;
         case AV_PIX_FMT_GBRP10:
             /* This saves memory bandwidth when downloading */
             frames_ctx->sw_format = AV_PIX_FMT_X2BGR10;
diff --git a/libavcodec/vulkan_dpx.c b/libavcodec/vulkan_dpx.c
new file mode 100644
index 0000000000..4cc8451d82
--- /dev/null
+++ b/libavcodec/vulkan_dpx.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2025 Lynne <[email protected]>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "vulkan_decode.h"
+#include "hwaccel_internal.h"
+
+#include "dpx.h"
+#include "libavutil/vulkan_spirv.h"
+#include "libavutil/mem.h"
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_dpx_unpack_comp;
+extern const char *ff_source_dpx_copy_comp;
+
+const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc = {
+    .codec_id         = AV_CODEC_ID_DPX,
+    .decode_extension = FF_VK_EXT_PUSH_DESCRIPTOR,
+    .queue_flags      = VK_QUEUE_COMPUTE_BIT,
+};
+
+typedef struct DPXVulkanDecodePicture {
+    FFVulkanDecodePicture vp;
+} DPXVulkanDecodePicture;
+
+typedef struct DPXVulkanDecodeContext {
+    FFVulkanShader shader;
+    AVBufferPool *frame_data_pool;
+} DPXVulkanDecodeContext;
+
+typedef struct DecodePushData {
+    int stride;
+    int need_align;
+    int padded_10bit;
+} DecodePushData;
+
+static int host_upoad_image(AVCodecContext *avctx,
+                            FFVulkanDecodeContext *dec, DPXDecContext *dpx,
+                            const uint8_t *src, uint32_t size)
+{
+    int err;
+    VkImage temp;
+
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    DPXVulkanDecodeContext *dxv = ctx->sd_ctx;
+    VkPhysicalDeviceLimits *limits = &ctx->s.props.properties.limits;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    if ((avctx->bits_per_raw_sample == 10) ||
+        (avctx->bits_per_raw_sample == 12 && !dpx->packing))
+        return 0;
+
+    VkImageCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .format = avctx->bits_per_raw_sample == 8 ? VK_FORMAT_R8_UINT :
+                  avctx->bits_per_raw_sample == 32 ? VK_FORMAT_R32_UINT :
+                                                     VK_FORMAT_R16_UINT,
+        .extent.width = dpx->frame->width*dpx->components,
+        .extent.height = dpx->frame->height,
+        .extent.depth = 1,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .tiling = VK_IMAGE_TILING_LINEAR,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_STORAGE_BIT | 
VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .pQueueFamilyIndices = &ctx->qf[0].idx,
+        .queueFamilyIndexCount = 1,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+    };
+
+    if (create_info.extent.width >= limits->maxImageDimension2D ||
+        create_info.extent.height >= limits->maxImageDimension2D)
+        return 0;
+
+    vk->CreateImage(ctx->s.hwctx->act_dev, &create_info, ctx->s.hwctx->alloc,
+                    &temp);
+
+    err = ff_vk_get_pooled_buffer(&ctx->s, &dxv->frame_data_pool,
+                                  &vp->slices_buf,
+                                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                                      
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+                                  NULL, size,
+                                  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+    if (err < 0)
+        return err;
+
+    FFVkBuffer *vkb = (FFVkBuffer *)vp->slices_buf->data;
+    VkBindImageMemoryInfo bind_info = {
+        .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
+        .image = temp,
+        .memory = vkb->mem,
+    };
+    vk->BindImageMemory2(ctx->s.hwctx->act_dev, 1, &bind_info);
+
+    VkHostImageLayoutTransitionInfo layout_change = {
+        .sType = VK_STRUCTURE_TYPE_HOST_IMAGE_LAYOUT_TRANSITION_INFO,
+        .image = temp,
+        .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+        .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .subresourceRange.layerCount = 1,
+        .subresourceRange.levelCount = 1,
+    };
+    vk->TransitionImageLayoutEXT(ctx->s.hwctx->act_dev, 1, &layout_change);
+
+    VkMemoryToImageCopy copy_region = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_TO_IMAGE_COPY,
+        .pHostPointer = src,
+        .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .imageSubresource.layerCount = 1,
+        .imageExtent = (VkExtent3D){ dpx->frame->width*dpx->components,
+                                     dpx->frame->height,
+                                     1 },
+    };
+    VkCopyMemoryToImageInfo copy_info = {
+        .sType = VK_STRUCTURE_TYPE_COPY_MEMORY_TO_IMAGE_INFO,
+        .flags = VK_HOST_IMAGE_COPY_MEMCPY_BIT_EXT,
+        .dstImage = temp,
+        .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+        .regionCount = 1,
+        .pRegions = &copy_region,
+    };
+    vk->CopyMemoryToImageEXT(ctx->s.hwctx->act_dev, &copy_info);
+
+    vk->DestroyImage(ctx->s.hwctx->act_dev, temp, ctx->s.hwctx->alloc);
+
+    return 0;
+}
+
+static int vk_dpx_start_frame(AVCodecContext          *avctx,
+                              const AVBufferRef       *buffer_ref,
+                              av_unused const uint8_t *buffer,
+                              av_unused uint32_t       size)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    DPXDecContext *dpx = avctx->priv_data;
+
+    DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    if (ctx->s.extensions & FF_VK_EXT_HOST_IMAGE_COPY)
+        host_upoad_image(avctx, dec, dpx, buffer, size);
+
+    /* Host map the frame data if supported */
+    if (!vp->slices_buf &&
+        ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY)
+        ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, (uint8_t *)buffer,
+                              buffer_ref,
+                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+                              VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT);
+
+    /* Prepare frame to be used */
+    err = ff_vk_decode_prepare_frame_sdr(dec, dpx->frame, vp, 1,
+                                         FF_VK_REP_NATIVE, 0);
+    if (err < 0)
+        return err;
+
+    return 0;
+}
+
+static int vk_dpx_decode_slice(AVCodecContext *avctx,
+                               const uint8_t  *data,
+                               uint32_t        size)
+{
+    DPXDecContext *dpx = avctx->priv_data;
+
+    DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    if (!vp->slices_buf) {
+        int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0,
+                                         NULL, NULL);
+        if (err < 0)
+            return err;
+    }
+
+    return 0;
+}
+
+static int vk_dpx_end_frame(AVCodecContext *avctx)
+{
+    int err;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    DPXDecContext *dpx = avctx->priv_data;
+    DPXVulkanDecodeContext *dxv = ctx->sd_ctx;
+
+    DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
+
+    VkImageMemoryBarrier2 img_bar[8];
+    int nb_img_bar = 0;
+
+    FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+
+    /* Prepare deps */
+    RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, dpx->frame,
+                                 VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                 VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+    err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value,
+                                      dpx->frame);
+    if (err < 0)
+        return err;
+
+    RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0));
+    vp->slices_buf = NULL;
+
+    ff_vk_frame_barrier(&ctx->s, exec, dpx->frame, img_bar, &nb_img_bar,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                        VK_ACCESS_2_TRANSFER_WRITE_BIT,
+                        VK_IMAGE_LAYOUT_GENERAL,
+                        VK_QUEUE_FAMILY_IGNORED);
+
+    vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+        .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+        .pImageMemoryBarriers = img_bar,
+        .imageMemoryBarrierCount = nb_img_bar,
+    });
+    nb_img_bar = 0;
+
+    FFVulkanShader *shd = &dxv->shader;
+    ff_vk_shader_update_img_array(&ctx->s, exec, shd,
+                                  dpx->frame, vp->view.out,
+                                  0, 0,
+                                  VK_IMAGE_LAYOUT_GENERAL,
+                                  VK_NULL_HANDLE);
+    ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd,
+                                    0, 1, 0,
+                                    slices_buf,
+                                    0, slices_buf->size,
+                                    VK_FORMAT_UNDEFINED);
+
+    ff_vk_exec_bind_shader(&ctx->s, exec, shd);
+
+    /* Update push data */
+    DecodePushData pd = (DecodePushData) {
+        .stride = dpx->stride,
+        .need_align = dpx->need_align,
+        .padded_10bit = !dpx->unpadded_10bit,
+    };
+
+    ff_vk_shader_update_push_const(&ctx->s, exec, shd,
+                                   VK_SHADER_STAGE_COMPUTE_BIT,
+                                   0, sizeof(pd), &pd);
+
+    vk->CmdDispatch(exec->buf,
+                    FFALIGN(dpx->frame->width,  
shd->lg_size[0])/shd->lg_size[0],
+                    FFALIGN(dpx->frame->height, 
shd->lg_size[1])/shd->lg_size[1],
+                    1);
+
+    err = ff_vk_exec_submit(&ctx->s, exec);
+    if (err < 0)
+        return err;
+
+fail:
+    return 0;
+}
+
+static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
+                       FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
+                       FFVulkanShader *shd, int bits)
+{
+    int err;
+    DPXDecContext *dpx = avctx->priv_data;
+    FFVulkanDescriptorSetBinding *desc_set;
+    AVHWFramesContext *dec_frames_ctx;
+    dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
+
+    uint8_t *spv_data;
+    size_t spv_len;
+    void *spv_opaque = NULL;
+
+    RET(ff_vk_shader_init(s, shd, "dpx",
+                          VK_SHADER_STAGE_COMPUTE_BIT,
+                          (const char *[]) { "GL_EXT_buffer_reference",
+                                             "GL_EXT_buffer_reference2" }, 2,
+                          512, 1, 1,
+                          0));
+
+    /* Common codec header */
+    GLSLD(ff_source_common_comp);
+
+    GLSLC(0, layout(push_constant, scalar) uniform pushConstants {            
);
+    GLSLC(1,     int stride;                                                  
);
+    GLSLC(1,     int need_align;                                              
);
+    GLSLC(1,     int padded_10bit;                                            
);
+    GLSLC(0, };                                                               
);
+    GLSLC(0,                                                                  
);
+    ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
+                                VK_SHADER_STAGE_COMPUTE_BIT);
+
+    int unpack = avctx->bits_per_raw_sample == 12 && dpx->packing ||
+                 avctx->bits_per_raw_sample == 10;
+
+    desc_set = (FFVulkanDescriptorSetBinding []) {
+        {
+            .name       = "dst",
+            .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+            .dimensions = 2,
+            .mem_quali  = "writeonly",
+            .mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
+                                               FF_VK_REP_NATIVE),
+            .elems      = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
+            .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+        },
+        {
+            .name        = "data_buf",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .mem_quali   = "readonly",
+            .buf_content = (unpack || bits == 32) ? "uint32_t data[];" :
+                           bits == 8 ? "uint16_t data[];" : "uint16_t data[];",
+        },
+    };
+    RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
+
+    if (dpx->endian)
+        GLSLC(0, #define BIG_ENDIAN                                           
);
+    GLSLF(0, #define COMPONENTS (%i)                          
,dpx->components);
+    GLSLF(0, #define BITS_PER_COMP (%i)                                  
,bits);
+    if (unpack) {
+        if (bits == 10)
+            GLSLC(0, #define PACKED_10BIT                                     
);
+        GLSLD(ff_source_dpx_unpack_comp);
+    } else {
+        GLSLF(0, #define SHIFT (%i)                   ,FFALIGN(bits, 8) - 
bits);
+        GLSLF(0, #define TYPE uint%i_t                       ,FFALIGN(bits, 
8));
+        GLSLF(0, #define TYPE_VEC u%ivec4                    ,FFALIGN(bits, 
8));
+        GLSLF(0, #define TYPE_REVERSE(x) (reverse%i(x)),    FFALIGN(bits, 
8)/8);
+        GLSLD(ff_source_dpx_copy_comp);
+    }
+
+    RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
+                            &spv_opaque));
+    RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
+
+    RET(ff_vk_shader_register_exec(s, pool, shd));
+
+fail:
+    if (spv_opaque)
+        spv->free_shader(spv, &spv_opaque);
+
+    return err;
+}
+
+static void vk_decode_dpx_uninit(FFVulkanDecodeShared *ctx)
+{
+    DPXVulkanDecodeContext *fv = ctx->sd_ctx;
+
+    ff_vk_shader_free(&ctx->s, &fv->shader);
+
+    av_buffer_pool_uninit(&fv->frame_data_pool);
+
+    av_freep(&fv);
+}
+
+static int vk_decode_dpx_init(AVCodecContext *avctx)
+{
+    int err;
+    DPXDecContext *dpx = avctx->priv_data;
+    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+
+    switch (dpx->pix_fmt) {
+    case AV_PIX_FMT_GRAY10:
+    case AV_PIX_FMT_GBRAP10:
+    case AV_PIX_FMT_GBRAP12:
+    case AV_PIX_FMT_UYVY422:
+    case AV_PIX_FMT_YUV444P:
+    case AV_PIX_FMT_YUVA444P:
+        return AVERROR(ENOTSUP);
+    default:
+        break;
+    }
+
+    if (!dpx->unpadded_10bit)
+        return AVERROR(ENOTSUP);
+
+    FFVkSPIRVCompiler *spv = ff_vk_spirv_init();
+    if (!spv) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+        return AVERROR_EXTERNAL;
+    }
+
+    err = ff_vk_decode_init(avctx);
+    if (err < 0)
+        return err;
+
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
+    DPXVulkanDecodeContext *dxv = ctx->sd_ctx = av_mallocz(sizeof(*dxv));
+    if (!dxv) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    ctx->sd_ctx_free = &vk_decode_dpx_uninit;
+
+    RET(init_shader(avctx, &ctx->s, &ctx->exec_pool,
+                    spv, &dxv->shader, avctx->bits_per_raw_sample));
+
+fail:
+    spv->uninit(&spv);
+
+    return err;
+}
+
+static void vk_dpx_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
+{
+    AVHWDeviceContext *dev_ctx = _hwctx.nc;
+
+    DPXVulkanDecodePicture *pp = data;
+    FFVulkanDecodePicture *vp = &pp->vp;
+
+    ff_vk_decode_free_frame(dev_ctx, vp);
+}
+
+const FFHWAccel ff_dpx_vulkan_hwaccel = {
+    .p.name                = "dpx_vulkan",
+    .p.type                = AVMEDIA_TYPE_VIDEO,
+    .p.id                  = AV_CODEC_ID_DPX,
+    .p.pix_fmt             = AV_PIX_FMT_VULKAN,
+    .start_frame           = &vk_dpx_start_frame,
+    .decode_slice          = &vk_dpx_decode_slice,
+    .end_frame             = &vk_dpx_end_frame,
+    .free_frame_priv       = &vk_dpx_free_frame_priv,
+    .frame_priv_data_size  = sizeof(DPXVulkanDecodePicture),
+    .init                  = &vk_decode_dpx_init,
+    .update_thread_context = &ff_vk_update_thread_context,
+    .decode_params         = &ff_vk_params_invalidate,
+    .flush                 = &ff_vk_decode_flush,
+    .uninit                = &ff_vk_decode_uninit,
+    .frame_params          = &ff_vk_frame_params,
+    .priv_data_size        = sizeof(FFVulkanDecodeContext),
+    .caps_internal         = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
+};
-- 
2.49.1

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to