[FFmpeg-devel] [PR] swscale/vulkan: support interlaced filtering (PR #23236)

Lynne via ffmpeg-devel Mon, 25 May 2026 22:38:06 -0700

PR #23236 opened by Lynne
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23236
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23236.patch



>From 49699875374daad50daa07e6f2b055a4dc57b9a2 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 26 May 2026 00:46:24 +0900
Subject: [PATCH 1/2] swscale/vulkan: implement filtered reads for the SPIRV
 backend

Same as GLSL, but much more annoying due to descriptors.
---
 libswscale/vulkan/ops.c    | 195 ++++++++++++++++++++++++++++++++++---
 libswscale/vulkan/spvasm.h |   1 +
 2 files changed, 180 insertions(+), 16 deletions(-)

diff --git a/libswscale/vulkan/ops.c b/libswscale/vulkan/ops.c
index c623d1663e..c3bbd52d3c 100644
--- a/libswscale/vulkan/ops.c
+++ b/libswscale/vulkan/ops.c
@@ -290,6 +290,25 @@ struct DitherData {
         int struct_ptr_id;
         int id;
         int mask_id;
+        int binding;
+};
+
+struct FilterData {
+    SwsOpType filter;
+    int filter_size;
+    int dst_size;
+    int num_weights;
+
+    int arr_w_in_id;
+    int arr_w_out_id;
+    int arr_o_id;
+    int struct_id;
+    int struct_ptr_id;
+
+    int id; /* buffer ID */
+    int binding; /* descriptor idx in desc set 1 */
+
+    int tap_const_base;
 };
 
 typedef struct SPIRVIDs {
@@ -334,6 +353,10 @@ typedef struct SPIRVIDs {
     int dither_ptr_elem_id;
     int nb_dither_bufs;
 
+    struct FilterData filt[MAX_FILT_BUFS];
+    int filt_o_ptr_id;
+    int nb_filter_bufs;
+
     int out_img_type;
     int out_img_array_id;
 
@@ -370,7 +393,8 @@ static void define_shader_header(SwsContext *sws, 
FFVulkanShader *shd, SwsOpList
 
     /* Entrypoint */
     id->ep = spi_OpEntryPoint(spi, SpvExecutionModelGLCompute, "main",
-                                id->in_vars, 3 + id->nb_dither_bufs);
+                                id->in_vars,
+                                3 + id->nb_dither_bufs + id->nb_filter_bufs);
     spi_OpExecutionMode(spi, id->ep, SpvExecutionModeLocalSize,
                         shd->lg_size, 3);
 
@@ -396,7 +420,24 @@ static void define_shader_header(SwsContext *sws, 
FFVulkanShader *shd, SwsOpList
         spi_OpDecorate(spi, id->dither[i].struct_id, SpvDecorationBlock);
         spi_OpMemberDecorate(spi, id->dither[i].struct_id, 0, 
SpvDecorationOffset, 0);
         spi_OpDecorate(spi, id->dither[i].id, SpvDecorationDescriptorSet, 1);
-        spi_OpDecorate(spi, id->dither[i].id, SpvDecorationBinding, i);
+        spi_OpDecorate(spi, id->dither[i].id, SpvDecorationBinding,
+                       id->dither[i].binding);
+    }
+
+    for (int i = 0; i < id->nb_filter_bufs; i++) {
+        struct FilterData *f = &id->filt[i];
+        spi_OpDecorate(spi, f->arr_w_in_id, SpvDecorationArrayStride,
+                       sizeof(float));
+        spi_OpDecorate(spi, f->arr_w_out_id, SpvDecorationArrayStride,
+                       f->filter_size*sizeof(float));
+        spi_OpDecorate(spi, f->arr_o_id, SpvDecorationArrayStride,
+                       sizeof(int32_t));
+        spi_OpDecorate(spi, f->struct_id, SpvDecorationBlock);
+        spi_OpMemberDecorate(spi, f->struct_id, 0, SpvDecorationOffset, 0);
+        spi_OpMemberDecorate(spi, f->struct_id, 1, SpvDecorationOffset,
+                             f->num_weights*sizeof(float));
+        spi_OpDecorate(spi, f->id, SpvDecorationDescriptorSet, 1);
+        spi_OpDecorate(spi, f->id, SpvDecorationBinding, f->binding);
     }
 
     if (!(sws->flags & SWS_BITEXACT))
@@ -605,6 +646,32 @@ static void define_shader_bindings(SwsOpList *ops, SPICtx 
*spi, SPIRVIDs *id,
                                       SpvStorageClassUniform, 0);
     }
 
+    /* Filter buffers: struct { float w[dst_size][filter_size]; int 
o[dst_size]; } */
+    id->filt_o_ptr_id = 0;
+    if (id->nb_filter_bufs)
+        id->filt_o_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
+                                              id->i32_type);
+
+    for (int i = 0; i < id->nb_filter_bufs; i++) {
+        struct FilterData *f = &id->filt[i];
+        int fs_id = spi_OpConstantUInt(spi, id->u32_type, f->filter_size);
+        int ds_id = spi_OpConstantUInt(spi, id->u32_type, f->dst_size);
+
+        spi_OpTypeArray(spi, id->f32_type, f->arr_w_in_id, fs_id);
+        spi_OpTypeArray(spi, f->arr_w_in_id, f->arr_w_out_id, ds_id);
+        spi_OpTypeArray(spi, id->i32_type, f->arr_o_id, ds_id);
+        spi_OpTypeStruct(spi, f->struct_id, f->arr_w_out_id, f->arr_o_id);
+        f->struct_ptr_id = spi_OpTypePointer(spi, SpvStorageClassUniform,
+                                             f->struct_id);
+        f->id = spi_OpVariable(spi, f->id, f->struct_ptr_id,
+                               SpvStorageClassUniform, 0);
+
+        /* Signed tap-index constants 0..filter_size-1 (consecutive <id>s) */
+        f->tap_const_base = spi_OpConstantInt(spi, id->i32_type, 0);
+        for (int t = 1; t < f->filter_size; t++)
+            spi_OpConstantInt(spi, id->i32_type, t);
+    }
+
     const SwsOp *op_w = ff_sws_op_list_output(ops);
     const SwsOp *op_r = ff_sws_op_list_input(ops);
 
@@ -718,6 +785,70 @@ static int insert_bitexact_linear(const SwsOp *op, SPICtx 
*spi, SPIRVIDs *id,
                                     res[0], res[1], res[2], res[3]);
 }
 
+static int read_filtered(SPICtx *spi, SPIRVIDs *id, const SwsOpList *ops,
+                         const SwsOp *op, const struct FilterData *f,
+                         const int *in_img, int gid, int gi2)
+{
+    const int is_h = f->filter == SWS_OP_FILTER_H;
+
+    /* Buffer array index along the filtered axis: pos.x (H) or pos.y (V) */
+    int axis = spi_OpCompositeExtract(spi, id->u32_type, gid, is_h ? 0 : 1);
+
+    /* int o = filter_o[axis]; */
+    int o_ptr = spi_OpAccessChain(spi, id->filt_o_ptr_id, f->id,
+                                  id->u32_cid[1], axis);
+    int o = spi_OpLoad(spi, id->i32_type, o_ptr, SpvMemoryAccessMaskNone, 0);
+
+    /* Signed pixel position, for the non-filtered coordinate axis */
+    int pos_x = spi_OpCompositeExtract(spi, id->i32_type, gi2, 0);
+    int pos_y = spi_OpCompositeExtract(spi, id->i32_type, gi2, 1);
+
+    /* Accumulators, initialized to zero */
+    int acc_s[4] = { id->f32_0, id->f32_0, id->f32_0, id->f32_0 };
+    int acc_v = id->f32_0;
+    if (op->rw.packed)
+        acc_v = spi_OpCompositeConstruct(spi, id->f32vec4_type,
+                                         id->f32_0, id->f32_0,
+                                         id->f32_0, id->f32_0);
+
+    for (int t = 0; t < f->filter_size; t++) {
+        /* float w = filter_w[axis][t]; */
+        int w_ptr = spi_OpAccessChain(spi, id->dither_ptr_elem_id, f->id,
+                                      id->u32_cid[0], axis,
+                                      f->tap_const_base + t);
+        int w = spi_OpLoad(spi, id->f32_type, w_ptr,
+                           SpvMemoryAccessMaskNone, 0);
+
+        /* Source coordinate, filtered axis offset by the tap index */
+        int c = t ? spi_OpIAdd(spi, id->i32_type, o, f->tap_const_base + t) : 
o;
+        int coord = is_h ?
+            spi_OpCompositeConstruct(spi, id->i32vec2_type, c, pos_y) :
+            spi_OpCompositeConstruct(spi, id->i32vec2_type, pos_x, c);
+
+        if (op->rw.packed) {
+            int px = spi_OpImageRead(spi, id->f32vec4_type,
+                                     in_img[ops->plane_src[0]], coord,
+                                     SpvImageOperandsMaskNone);
+            px = spi_OpVectorTimesScalar(spi, id->f32vec4_type, px, w);
+            acc_v = spi_OpFAdd(spi, id->f32vec4_type, acc_v, px);
+        } else {
+            for (int e = 0; e < op->rw.elems; e++) {
+                int px = spi_OpImageRead(spi, id->f32vec4_type,
+                                         in_img[ops->plane_src[e]], coord,
+                                         SpvImageOperandsMaskNone);
+                px = spi_OpCompositeExtract(spi, id->f32_type, px, 0);
+                px = spi_OpFMul(spi, id->f32_type, w, px);
+                acc_s[e] = spi_OpFAdd(spi, id->f32_type, acc_s[e], px);
+            }
+        }
+    }
+
+    if (op->rw.packed)
+        return acc_v;
+    return spi_OpCompositeConstruct(spi, id->f32vec4_type,
+                                    acc_s[0], acc_s[1], acc_s[2], acc_s[3]);
+}
+
 static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, FFVulkanOpsCtx *s,
                          SwsOpList *ops, FFVulkanShader *shd)
 {
@@ -763,33 +894,60 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     if (err < 0)
         return err;
 
-    /* Entrypoint inputs: gl_GlobalInvocationID, input and output images, 
dither */
+    /* Entrypoint inputs; gl_GlobalInvocationID, input and output images, 
dither */
     id->in_vars[0] = spi_get_id(spi);
     id->in_vars[1] = spi_get_id(spi);
     id->in_vars[2] = spi_get_id(spi);
 
-    /* Create dither buffer descriptor set */
+    /* Create dither and filter buffer descriptor set. Both are collected in
+     * op order, so the bindings match the buffer order from create_bufs().*/
     id->nb_dither_bufs = 0;
+    id->nb_filter_bufs = 0;
+    int nb_data_bufs = 0;
     for (int n = 0; n < ops->num_ops; n++) {
         const SwsOp *op = &ops->ops[n];
-        if (op->op != SWS_OP_DITHER)
+        int var_id = 0;
+
+        if (op->op == SWS_OP_DITHER) {
+            if (id->nb_dither_bufs >= MAX_DITHER_BUFS)
+                return AVERROR(ENOTSUP);
+            struct DitherData *d = &id->dither[id->nb_dither_bufs++];
+            d->size      = 1 << op->dither.size_log2;
+            d->arr_1d_id = spi_get_id(spi);
+            d->arr_2d_id = spi_get_id(spi);
+            d->struct_id = spi_get_id(spi);
+            d->id        = spi_get_id(spi);
+            d->binding   = nb_data_bufs;
+            var_id       = d->id;
+        } else if (op->op == SWS_OP_READ && op->rw.filter) {
+            if (id->nb_filter_bufs >= MAX_FILT_BUFS)
+                return AVERROR(ENOTSUP);
+            const SwsFilterWeights *wd = op->rw.kernel;
+            struct FilterData *f = &id->filt[id->nb_filter_bufs++];
+            f->filter       = op->rw.filter;
+            f->filter_size  = wd->filter_size;
+            f->dst_size     = wd->dst_size;
+            f->num_weights  = wd->num_weights;
+            f->arr_w_in_id  = spi_get_id(spi);
+            f->arr_w_out_id = spi_get_id(spi);
+            f->arr_o_id     = spi_get_id(spi);
+            f->struct_id    = spi_get_id(spi);
+            f->id           = spi_get_id(spi);
+            f->binding      = nb_data_bufs;
+            var_id          = f->id;
+        } else {
             continue;
+        }
 
-        id->dither[id->nb_dither_bufs].size = 1 << op->dither.size_log2;
-        id->dither[id->nb_dither_bufs].arr_1d_id = spi_get_id(spi);
-        id->dither[id->nb_dither_bufs].arr_2d_id = spi_get_id(spi);
-        id->dither[id->nb_dither_bufs].struct_id = spi_get_id(spi);
-        id->dither[id->nb_dither_bufs].id = spi_get_id(spi);
-        id->in_vars[3 + id->nb_dither_bufs] = 
id->dither[id->nb_dither_bufs].id;
-
-        desc_set[id->nb_dither_bufs++] = (FFVulkanDescriptorSetBinding) {
+        id->in_vars[3 + nb_data_bufs] = var_id;
+        desc_set[nb_data_bufs++] = (FFVulkanDescriptorSetBinding) {
             .type        = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
             .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
         };
     }
-    if (id->nb_dither_bufs)
+    if (nb_data_bufs)
         ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set,
-                                        id->nb_dither_bufs, 1, 0);
+                                        nb_data_bufs, 1, 0);
 
     /* Define shader header sections */
     define_shader_header(sws, shd, ops, spi, id);
@@ -856,6 +1014,7 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     int nb_const_ids = 0;
     int nb_dither_bufs = 0;
     int nb_linear_ops = 0;
+    int nb_filter_used = 0;
 
     /* Operations */
     for (int n = 0; n < ops->num_ops; n++) {
@@ -871,8 +1030,12 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
 
         switch (op->op) {
         case SWS_OP_READ:
-            if (op->rw.frac || op->rw.filter) {
+            if (op->rw.frac) {
                 return AVERROR(ENOTSUP);
+            } else if (op->rw.filter) {
+                data = read_filtered(spi, id, ops, op,
+                                     &id->filt[nb_filter_used++],
+                                     in_img, gid, gi2);
             } else if (op->rw.packed) {
                 data = spi_OpImageRead(spi, type_v, in_img[ops->plane_src[0]],
                                        gid, SpvImageOperandsMaskNone);
diff --git a/libswscale/vulkan/spvasm.h b/libswscale/vulkan/spvasm.h
index 372163f7bd..b671292987 100644
--- a/libswscale/vulkan/spvasm.h
+++ b/libswscale/vulkan/spvasm.h
@@ -324,6 +324,7 @@ OP_RES2SRC(OpIAdd, 128)
 OP_RES2SRC(OpFAdd, 129)
 OP_RES2SRC(OpIMul, 132)
 OP_RES2SRC(OpFMul, 133)
+OP_RES2SRC(OpVectorTimesScalar, 142)
 OP_RES2SRC(OpMatrixTimesVector, 145)
 OP_RES2SRC(OpDot, 148)
 
-- 
2.52.0


>From a31a2a9b62247e70c57ef7533d77fdbd76780ad5 Mon Sep 17 00:00:00 2001
From: Lynne <[email protected]>
Date: Tue, 26 May 2026 14:33:13 +0900
Subject: [PATCH 2/2] swscale/vulkan: support interlaced filtering

Sponsored-by: Sovereign Tech Fund
---
 libswscale/vulkan/ops.c | 165 +++++++++++++++++++++++++++++++++-------
 1 file changed, 139 insertions(+), 26 deletions(-)

diff --git a/libswscale/vulkan/ops.c b/libswscale/vulkan/ops.c
index c3bbd52d3c..d70192e4c7 100644
--- a/libswscale/vulkan/ops.c
+++ b/libswscale/vulkan/ops.c
@@ -21,6 +21,7 @@
 #include "libavutil/mem.h"
 #include "libavutil/refstruct.h"
 
+#include "../graph.h"
 #include "../ops_internal.h"
 #include "../swscale_internal.h"
 
@@ -101,6 +102,7 @@ typedef struct VulkanPriv {
     int nb_data_bufs;
     enum FFVkShaderRepFormat src_rep;
     enum FFVkShaderRepFormat dst_rep;
+    int interlaced;
 } VulkanPriv;
 
 static void process(const SwsFrame *dst, const SwsFrame *src, int y, int h,
@@ -150,11 +152,18 @@ static void process(const SwsFrame *dst, const SwsFrame 
*src, int y, int h,
         .imageMemoryBarrierCount = nb_img_bar,
     });
 
+    if (p->interlaced) {
+        uint32_t field = pass->graph ? pass->graph->field : 0;
+        ff_vk_shader_update_push_const(&p->s->vkctx, ec, &p->shd,
+                                       VK_SHADER_STAGE_COMPUTE_BIT,
+                                       0, sizeof(field), &field);
+    }
+
     ff_vk_exec_bind_shader(&p->s->vkctx, ec, &p->shd);
 
     vk->CmdDispatch(ec->buf,
-                    FFALIGN(dst_f->width, p->shd.lg_size[0])/p->shd.lg_size[0],
-                    FFALIGN(dst_f->height, 
p->shd.lg_size[1])/p->shd.lg_size[1],
+                    FFALIGN(dst->width, p->shd.lg_size[0])/p->shd.lg_size[0],
+                    FFALIGN(dst->height, p->shd.lg_size[1])/p->shd.lg_size[1],
                     1);
 
     ff_vk_exec_submit(&p->s->vkctx, ec);
@@ -312,7 +321,7 @@ struct FilterData {
 };
 
 typedef struct SPIRVIDs {
-    int in_vars[3 + MAX_DATA_BUFS];
+    int in_vars[3 + MAX_DATA_BUFS + 1];
 
     int glfn;
     int ep;
@@ -370,6 +379,14 @@ typedef struct SPIRVIDs {
 
     int in_img_tptr;
     int in_img_sptr;
+
+    /* Interlaced handling */
+    int interlaced;
+    int push_const_struct_id;
+    int push_const_ptr_id;
+    int push_const_elem_ptr_id;
+    int push_const_var_id;
+    int field_i32;
 } SPIRVIDs;
 
 /* Section 1: Function to define all shader header data, and decorations */
@@ -394,10 +411,17 @@ static void define_shader_header(SwsContext *sws, 
FFVulkanShader *shd, SwsOpList
     /* Entrypoint */
     id->ep = spi_OpEntryPoint(spi, SpvExecutionModelGLCompute, "main",
                                 id->in_vars,
-                                3 + id->nb_dither_bufs + id->nb_filter_bufs);
+                                3 + id->nb_dither_bufs + id->nb_filter_bufs +
+                                (id->interlaced ? 1 : 0));
     spi_OpExecutionMode(spi, id->ep, SpvExecutionModeLocalSize,
                         shd->lg_size, 3);
 
+    if (id->interlaced) {
+        spi_OpDecorate(spi, id->push_const_struct_id, SpvDecorationBlock);
+        spi_OpMemberDecorate(spi, id->push_const_struct_id, 0,
+                             SpvDecorationOffset, 0);
+    }
+
     /* gl_GlobalInvocationID descriptor decorations */
     spi_OpDecorate(spi, id->in_vars[0], SpvDecorationBuiltIn,
                    SpvBuiltInGlobalInvocationId);
@@ -725,6 +749,16 @@ static void define_shader_bindings(SwsOpList *ops, SPICtx 
*spi, SPIRVIDs *id,
     }
     spi_OpVariable(spi, id->in_vars[2], id->out_img_tptr,
                    SpvStorageClassUniformConstant, 0);
+
+    if (id->interlaced) {
+        spi_OpTypeStruct(spi, id->push_const_struct_id, id->u32_type);
+        id->push_const_ptr_id = spi_OpTypePointer(spi, 
SpvStorageClassPushConstant,
+                                                  id->push_const_struct_id);
+        id->push_const_elem_ptr_id = spi_OpTypePointer(spi, 
SpvStorageClassPushConstant,
+                                                       id->u32_type);
+        spi_OpVariable(spi, id->push_const_var_id, id->push_const_ptr_id,
+                       SpvStorageClassPushConstant, 0);
+    }
 }
 
 static int insert_vmat_linear(const SwsOp *op, SPICtx *spi, SPIRVIDs *id,
@@ -790,6 +824,7 @@ static int read_filtered(SPICtx *spi, SPIRVIDs *id, const 
SwsOpList *ops,
                          const int *in_img, int gid, int gi2)
 {
     const int is_h = f->filter == SWS_OP_FILTER_H;
+    const int src_interlaced = ops->src.interlaced;
 
     /* Buffer array index along the filtered axis: pos.x (H) or pos.y (V) */
     int axis = spi_OpCompositeExtract(spi, id->u32_type, gid, is_h ? 0 : 1);
@@ -803,6 +838,13 @@ static int read_filtered(SPICtx *spi, SPIRVIDs *id, const 
SwsOpList *ops,
     int pos_x = spi_OpCompositeExtract(spi, id->i32_type, gi2, 0);
     int pos_y = spi_OpCompositeExtract(spi, id->i32_type, gi2, 1);
 
+    /* For interlaced horizontal filtering, the y coordinate of every tap is
+     * the (constant) destination y mapped into the source image. */
+    if (src_interlaced && is_h) {
+        pos_y = spi_OpShiftLeftLogical(spi, id->i32_type, pos_y, 
id->u32_cid[1]);
+        pos_y = spi_OpIAdd(spi, id->i32_type, pos_y, id->field_i32);
+    }
+
     /* Accumulators, initialized to zero */
     int acc_s[4] = { id->f32_0, id->f32_0, id->f32_0, id->f32_0 };
     int acc_v = id->f32_0;
@@ -821,6 +863,12 @@ static int read_filtered(SPICtx *spi, SPIRVIDs *id, const 
SwsOpList *ops,
 
         /* Source coordinate, filtered axis offset by the tap index */
         int c = t ? spi_OpIAdd(spi, id->i32_type, o, f->tap_const_base + t) : 
o;
+        /* For interlaced vertical filtering, the per-tap source row is
+         * field-local; map it to the actual image row. */
+        if (src_interlaced && !is_h) {
+            c = spi_OpShiftLeftLogical(spi, id->i32_type, c, id->u32_cid[1]);
+            c = spi_OpIAdd(spi, id->i32_type, c, id->field_i32);
+        }
         int coord = is_h ?
             spi_OpCompositeConstruct(spi, id->i32vec2_type, c, pos_y) :
             spi_OpCompositeConstruct(spi, id->i32vec2_type, pos_x, c);
@@ -857,14 +905,17 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     SPIRVIDs spid_data = { 0 }, *id = &spid_data;
     spi_init(spi, spvbuf, sizeof(spvbuf));
 
-    /* Interlaced formats are not currently supported */
-    if (ops->src.interlaced || ops->dst.interlaced)
-        return AVERROR(ENOTSUP);
+    id->interlaced = ops->src.interlaced || ops->dst.interlaced;
+    p->interlaced = id->interlaced;
 
     ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
                       (uint32_t []) { 32, 32, 1 }, 0);
     shd->precompiled = 0;
 
+    if (id->interlaced)
+        ff_vk_shader_add_push_const(shd, 0, sizeof(uint32_t),
+                                    VK_SHADER_STAGE_COMPUTE_BIT);
+
     /* Image ops, to determine types */
     const SwsOp *op_w = ff_sws_op_list_output(ops);
     int out_img_count = op_w->rw.packed ? 1 : op_w->rw.elems;
@@ -949,6 +1000,13 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
         ff_vk_shader_add_descriptor_set(&s->vkctx, shd, desc_set,
                                         nb_data_bufs, 1, 0);
 
+    if (id->interlaced) {
+        id->push_const_struct_id = spi_get_id(spi);
+        id->push_const_var_id = spi_get_id(spi);
+        id->in_vars[3 + id->nb_dither_bufs + id->nb_filter_bufs] =
+            id->push_const_var_id;
+    }
+
     /* Define shader header sections */
     define_shader_header(sws, shd, ops, spi, id);
     define_shader_consts(sws, ops, spi, id);
@@ -985,9 +1043,40 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     gid = spi_OpVectorShuffle(spi, id->u32vec2_type, gid, gid, 0, 1);
     int gi2 = spi_OpBitcast(spi, id->i32vec2_type, gid);
 
+    /* For interlaced sources/destinations the shader operates on field-local
+     * coordinates, while images contain the full frame. Map the y axis to the
+     * actual image row: image_y = field_y * 2 + field. */
+    int dst_gid = gid, dst_gi2 = gi2;
+    int src_gid = gid;
+    if (id->interlaced) {
+        int field_u32_ptr = spi_OpAccessChain(spi, id->push_const_elem_ptr_id,
+                                              id->push_const_var_id,
+                                              id->u32_cid[0]);
+        int field_u32 = spi_OpLoad(spi, id->u32_type, field_u32_ptr,
+                                   SpvMemoryAccessMaskNone, 0);
+        id->field_i32 = spi_OpBitcast(spi, id->i32_type, field_u32);
+
+        int img_y_i32 = spi_OpShiftLeftLogical(spi, id->i32_type,
+                                               spi_OpCompositeExtract(spi, 
id->i32_type, gi2, 1),
+                                               id->u32_cid[1]);
+        img_y_i32 = spi_OpIAdd(spi, id->i32_type, img_y_i32, id->field_i32);
+
+        int gi2_x = spi_OpCompositeExtract(spi, id->i32_type, gi2, 0);
+        int mapped_gi2 = spi_OpCompositeConstruct(spi, id->i32vec2_type,
+                                                  gi2_x, img_y_i32);
+        int mapped_gid = spi_OpBitcast(spi, id->u32vec2_type, mapped_gi2);
+
+        if (ops->src.interlaced)
+            src_gid = mapped_gid;
+        if (ops->dst.interlaced) {
+            dst_gid = mapped_gid;
+            dst_gi2 = mapped_gi2;
+        }
+    }
+
     /* imageSize(out_img[0]); */
     int img1_s = spi_OpImageQuerySize(spi, id->i32vec2_type, out_img[0]);
-    int scmp = spi_OpSGreaterThanEqual(spi, id->bvec2_type, gi2, img1_s);
+    int scmp = spi_OpSGreaterThanEqual(spi, id->bvec2_type, dst_gi2, img1_s);
     scmp = spi_OpAny(spi, id->b_type, scmp);
 
     /* if (out of bounds) return */
@@ -1038,12 +1127,12 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv 
*p, FFVulkanOpsCtx *s,
                                      in_img, gid, gi2);
             } else if (op->rw.packed) {
                 data = spi_OpImageRead(spi, type_v, in_img[ops->plane_src[0]],
-                                       gid, SpvImageOperandsMaskNone);
+                                       src_gid, SpvImageOperandsMaskNone);
             } else {
                 int tmp[4] = { uid, uid, uid, uid };
                 for (int i = 0; i < op->rw.elems; i++) {
                     tmp[i] = spi_OpImageRead(spi, type_v,
-                                             in_img[ops->plane_src[i]], gid,
+                                             in_img[ops->plane_src[i]], 
src_gid,
                                              SpvImageOperandsMaskNone);
                     tmp[i] = spi_OpCompositeExtract(spi, type_s, tmp[i], 0);
                 }
@@ -1055,13 +1144,13 @@ static int add_ops_spirv(SwsContext *sws, VulkanPriv 
*p, FFVulkanOpsCtx *s,
             if (op->rw.frac || op->rw.filter) {
                 return AVERROR(ENOTSUP);
             } else if (op->rw.packed) {
-                spi_OpImageWrite(spi, out_img[ops->plane_dst[0]], gid, data,
+                spi_OpImageWrite(spi, out_img[ops->plane_dst[0]], dst_gid, 
data,
                                  SpvImageOperandsMaskNone);
             } else {
                 for (int i = 0; i < op->rw.elems; i++) {
                     int tmp = spi_OpCompositeExtract(spi, type_s, data, i);
                     tmp = spi_OpCompositeConstruct(spi, type_v, tmp, tmp, tmp, 
tmp);
-                    spi_OpImageWrite(spi, out_img[ops->plane_dst[i]], gid, tmp,
+                    spi_OpImageWrite(spi, out_img[ops->plane_dst[i]], dst_gid, 
tmp,
                                      SpvImageOperandsMaskNone);
                 }
             }
@@ -1217,10 +1306,15 @@ static void read_glsl(SwsOpList *ops, const SwsOp *op, 
FFVulkanShader *shd,
                       const char *type_v, const char *type_s)
 {
     const SwsFilterWeights *wd = op->rw.kernel;
+    const int interlaced = ops->src.interlaced;
     if (op->rw.filter) {
         const char *axis    = op->rw.filter == SWS_OP_FILTER_H ? "pos.x" : 
"pos.y";
         const char *coord_x = op->rw.filter == SWS_OP_FILTER_H ? "o + i" : 
"pos.x";
-        const char *coord_y = op->rw.filter == SWS_OP_FILTER_H ? "pos.y" : "o 
+ i";
+        const char *coord_y;
+        if (op->rw.filter == SWS_OP_FILTER_H)
+            coord_y = interlaced ? "spos.y" : "pos.y";
+        else
+            coord_y = interlaced ? "((o + i) * 2 + int(params.field))" : "o + 
i";
         GLSLC(1, tmp = vec4(0);                                               
);
         av_bprintf(&shd->src, "    int o = filter_o%i[%s];\n", idx, axis);
         av_bprintf(&shd->src, "    for (int i = 0; i < %i; i++) {\n",
@@ -1239,13 +1333,14 @@ static void read_glsl(SwsOpList *ops, const SwsOp *op, 
FFVulkanShader *shd,
         GLSLC(1, }                                                            
);
         GLSLC(1, f32 = tmp;                                                   
);
     } else {
+        const char *src_pos = interlaced ? "spos" : "pos";
         if (op->rw.packed) {
-            GLSLF(1, %s = %s(imageLoad(src_img[%i], pos));                     
,
-                  type_name, type_v, ops->plane_src[0]);
+            GLSLF(1, %s = %s(imageLoad(src_img[%i], %s));                      
,
+                  type_name, type_v, ops->plane_src[0], src_pos);
         } else {
             for (int i = 0; i < op->rw.elems; i++)
-                GLSLF(1, %s.%c = %s(imageLoad(src_img[%i], pos)[0]);           
,
-                      type_name, "xyzw"[i], type_s, ops->plane_src[i]);
+                GLSLF(1, %s.%c = %s(imageLoad(src_img[%i], %s)[0]);            
,
+                      type_name, "xyzw"[i], type_s, ops->plane_src[i], 
src_pos);
         }
     }
 }
@@ -1257,10 +1352,7 @@ static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     uint8_t *spv_data;
     size_t spv_len;
     void *spv_opaque = NULL;
-
-    /* Interlaced formats are not currently supported */
-    if (ops->src.interlaced || ops->dst.interlaced)
-        return AVERROR(ENOTSUP);
+    const int interlaced = ops->src.interlaced || ops->dst.interlaced;
 
     err = ff_vk_shader_init(&s->vkctx, shd, "sws_pass",
                             VK_SHADER_STAGE_COMPUTE_BIT,
@@ -1268,6 +1360,11 @@ static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
     if (err < 0)
         return err;
 
+    p->interlaced = interlaced;
+    if (interlaced)
+        ff_vk_shader_add_push_const(shd, 0, sizeof(uint32_t),
+                                    VK_SHADER_STAGE_COMPUTE_BIT);
+
     int nb_desc = 0;
     FFVulkanDescriptorSetBinding buf_desc[8];
 
@@ -1327,11 +1424,26 @@ static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
         ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc,
                                         nb_desc, 1, 0);
 
+    if (interlaced) {
+        GLSLC(0, layout(push_constant, std430) uniform pushConstants {        
);
+        GLSLC(1,     uint field;                                              
);
+        GLSLC(0, } params;                                                    
);
+        GLSLC(0,                                                              
);
+    }
+
     GLSLC(0, void main()                                                      
);
     GLSLC(0, {                                                                
);
     GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);                 
);
     GLSLC(1,     ivec2 size = imageSize(dst_img[0]);                          
);
-    GLSLC(1,     if (any(greaterThanEqual(pos, size)))                        
);
+    if (ops->src.interlaced)
+        GLSLC(1, ivec2 spos = ivec2(pos.x, pos.y * 2 + int(params.field));    
);
+    if (ops->dst.interlaced)
+        GLSLC(1, ivec2 dpos = ivec2(pos.x, pos.y * 2 + int(params.field));    
);
+    if (ops->dst.interlaced) {
+        GLSLC(1, if (any(greaterThanEqual(dpos, size)))                       
);
+    } else {
+        GLSLC(1, if (any(greaterThanEqual(pos, size)))                        
);
+    }
     GLSLC(2,         return;                                                  
);
     GLSLC(0,                                                                  
);
     GLSLC(1,     u8vec4 u8;                                                   
);
@@ -1362,15 +1474,16 @@ static int add_ops_glsl(SwsContext *sws, VulkanPriv *p, 
FFVulkanOpsCtx *s,
             break;
         }
         case SWS_OP_WRITE: {
+            const char *dst_pos = ops->dst.interlaced ? "dpos" : "pos";
             if (op->rw.frac || op->rw.filter) {
                 return AVERROR(ENOTSUP);
             } else if (op->rw.packed) {
-                GLSLF(1, imageStore(dst_img[%i], pos, %s(%s));                 
 ,
-                      ops->plane_dst[0], type_v, type_name);
+                GLSLF(1, imageStore(dst_img[%i], %s, %s(%s));                  
 ,
+                      ops->plane_dst[0], dst_pos, type_v, type_name);
             } else {
                 for (int i = 0; i < op->rw.elems; i++)
-                    GLSLF(1, imageStore(dst_img[%i], pos, %s(%s[%i]));         
,
-                          ops->plane_dst[i], type_v, type_name, i);
+                    GLSLF(1, imageStore(dst_img[%i], %s, %s(%s[%i]));          
 ,
+                          ops->plane_dst[i], dst_pos, type_v, type_name, i);
             }
             break;
         }
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] swscale/vulkan: support interlaced filtering (PR #23236)

Reply via email to