[FFmpeg-cvslog] [ffmpeg] 05/06: prores_raw: synchronize decoder with reference implementation

Lynne via ffmpeg-cvslog Sat, 16 May 2026 20:26:38 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 9c4055296525f69f5c5a2666cfd0cddd70110649
Author:     Lynne <[email protected]>
AuthorDate: Fri May 15 05:25:19 2026 +0900
Commit:     Lynne <[email protected]>
CommitDate: Sun May 17 12:17:16 2026 +0900

    prores_raw: synchronize decoder with reference implementation
    
    This completes the reverse engineering of the decoder.
    The commit applies the linearization curve from the previous patch.
---
 libavcodec/prores_raw.c                     | 19 ++++++-----
 libavcodec/prores_raw_parser.c              | 21 +++---------
 libavcodec/proresdsp.c                      | 50 +++++++++++++++++++++++++----
 libavcodec/proresdsp.h                      |  3 +-
 libavcodec/vulkan/prores_raw_idct.comp.glsl | 39 ++++++++++++++--------
 libavcodec/vulkan_prores_raw.c              |  8 +++--
 6 files changed, 90 insertions(+), 50 deletions(-)

diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c
index f4a1bd03ad..68a97abf56 100644
--- a/libavcodec/prores_raw.c
+++ b/libavcodec/prores_raw.c
@@ -45,15 +45,19 @@ static av_cold int decode_init(AVCodecContext *avctx)
 {
     ProResRAWContext *s = avctx->priv_data;
 
-    avctx->bits_per_raw_sample = 12;
+    /* The codec outputs linear data, with the transfer function of the
+     * camera and any adjustments built into an 8-point linearization curve */
+    avctx->bits_per_raw_sample = 16;
+    avctx->color_trc = AVCOL_TRC_LINEAR;
     avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
-    avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
     avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
 
     s->pix_fmt = AV_PIX_FMT_NONE;
 
     ff_blockdsp_init(&s->bdsp);
-    ff_proresdsp_init(&s->prodsp, avctx->bits_per_raw_sample);
+    /* Coefficients and the iDCT are 12-bit, the linearization curve then
+     * expands the result to the 16-bit linear output range. */
+    ff_proresdsp_init(&s->prodsp, 12);
 
     ff_permute_scantable(s->scan, ff_prores_interlaced_scan, 
s->prodsp.idct_permutation);
 
@@ -137,7 +141,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext 
*tile,
     const int block_mask = nb_blocks - 1;
     const int nb_codes   = 64 * nb_blocks;
 
-    LOCAL_ALIGNED_32(int16_t, block, [64*16]);
+    LOCAL_ALIGNED_32(int32_t, block, [64*16]);
 
     int16_t sign = 0;
     int16_t dc_add = 0;
@@ -158,8 +162,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext 
*tile,
     if ((ret = init_get_bits8(&gb, data, size)) < 0)
         return ret;
 
-    for (int n = 0; n < nb_blocks; n++)
-        s->bdsp.clear_block(block + n*64);
+    memset(block, 0, nb_blocks * 64 * sizeof(*block));
 
     /* Special handling for first block */
     int dc = get_value(&gb, 700);
@@ -234,7 +237,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext 
*tile,
 
     for (int n = 0; n < nb_blocks; n++) {
         uint16_t *ptr = dst + n*16;
-        s->prodsp.idct_put_bayer(ptr, linesize, block + n*64, qmat);
+        s->prodsp.idct_put_bayer(ptr, linesize, block + n*64, qmat, 
s->lin_curve);
     }
 
     return 0;
@@ -265,7 +268,7 @@ static int decode_tile(AVCodecContext *avctx, TileContext 
*tile,
         return AVERROR_INVALIDDATA;
 
     for (int i = 0; i < 64; i++)
-        qmat[i] = s->qmat[i] * scale >> 1;
+        qmat[i] = s->qmat[i] * scale;
 
     const uint8_t *comp_start = gb->buffer_start + header_len;
 
diff --git a/libavcodec/prores_raw_parser.c b/libavcodec/prores_raw_parser.c
index a32e4cf394..c75c3a30d4 100644
--- a/libavcodec/prores_raw_parser.c
+++ b/libavcodec/prores_raw_parser.c
@@ -62,24 +62,11 @@ static int prores_raw_parse(AVCodecParserContext *s, 
AVCodecContext *avctx,
     }
 
     /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */
-    switch (bytestream2_get_be32(&gb)) {
-    case MKBETAG('p','e','a','c'):
-        /* Internal recording from a Panasonic camera, V-Log */
-        avctx->color_primaries = AVCOL_PRI_V_GAMUT;
-        avctx->color_trc = AVCOL_TRC_V_LOG;
-        break;
-    case MKBETAG('a','t','m','0'):
-        /* External recording from an Atomos recorder. Cameras universally
-         * record in their own native log curve internally, but linearize it
-         * when outputting RAW externally */
-        avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
-        avctx->color_trc = AVCOL_TRC_LINEAR;
-        break;
-    default:
-        avctx->color_trc = AVCOL_TRC_UNSPECIFIED;
-        break;
-    };
+    bytestream2_skip(&gb, 4);
 
+    avctx->colorspace = AVCOL_SPC_UNSPECIFIED;
+    avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
+    avctx->color_trc = AVCOL_TRC_LINEAR;
     s->width = bytestream2_get_be16(&gb);
     s->height = bytestream2_get_be16(&gb);
     s->coded_width  = FFALIGN(s->width, 16);
diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c
index eb5dbf4799..cf4aa9d0cf 100644
--- a/libavcodec/proresdsp.c
+++ b/libavcodec/proresdsp.c
@@ -40,6 +40,14 @@
 #define BIT_DEPTH 12
 #include "simple_idct_template.c"
 #undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
+
+/* 32bit iDCT for the ProRes RAW */
+#define IN_IDCT_DEPTH 32
+#define BIT_DEPTH 12
+#include "simple_idct_template.c"
+#undef BIT_DEPTH
+#undef IN_IDCT_DEPTH
 
 /**
  * Special version of ff_simple_idct_int16_10bit() which does dequantization
@@ -74,6 +82,24 @@ static void prores_idct_12(int16_t *restrict block, const 
int16_t *restrict qmat
     }
 }
 
+/*
+ * 32-bit iDCT for the ProRes RAW
+ * qmat must be s->qmat[i] * scale
+ */
+static void prores_idct_bayer_32(int32_t *restrict block, const int16_t 
*restrict qmat)
+{
+    for (int i = 0; i < 64; i++)
+        block[i] = (block[i] * qmat[i]) >> 1;
+
+    for (int i = 0; i < 8; i++)
+        idctRowCondDC_int32_12bit(block + i*8, 0);
+
+    for (int i = 0; i < 8; i++) {
+        block[i] += 8192;
+        idctSparseCol_int32_12bit(block + i);
+    }
+}
+
 #define CLIP_MIN (1 << 2)                     ///< minimum value for clipping 
resulting pixels
 #define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1  ///< maximum value for clipping 
resulting pixels
 #define CLIP_MAX_12 (1 << 12) - CLIP_MIN - 1  ///< maximum value for clipping 
resulting pixels
@@ -99,12 +125,21 @@ static inline void put_pixel(uint16_t *dst, ptrdiff_t 
linesize, const int16_t *i
     }
 }
 
-static inline void put_pixel_bayer_12(uint16_t *dst, ptrdiff_t linesize,
-                                      const int16_t *in)
+/* Apply the 8-point combined linearization curve (inv. transfer fn + encoder 
shaping) */
+static inline void put_pixel_bayer_lin_curve_12(uint16_t *dst, ptrdiff_t 
linesize,
+                                                const int32_t *in, const 
uint16_t *lin_curve)
 {
     for (int y = 0; y < 8; y++, dst += linesize) {
-        for (int x = 0; x < 8; x++)
-            dst[x*2] = CLIP_12(in[(y << 3) + x]) << 4;
+        for (int x = 0; x < 8; x++) {
+            /* Convert the 32-bit input into 16-bits (lrintf(x*16 - 15.5f) = 
16) */
+            int u = av_clip_uint16(in[(y << 3) + x]*16 - 16);
+            uint32_t seg  = (uint32_t)u >> 13;
+            uint32_t frac = (uint32_t)u & 0x1FFF;
+            uint32_t cp0  = lin_curve[seg];
+            uint32_t cp1  = seg < 7 ? lin_curve[seg + 1] : 0;
+            uint32_t o    = (cp0 * 8192 + ((cp1 - cp0) & 0xFFFF) * frac + 
4096) >> 13;
+            dst[x*2]      = FFMIN(o, 0xFFFF);
+        }
     }
 }
 
@@ -131,10 +166,11 @@ static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t 
linesize, int16_t *blo
 }
 
 static void prores_idct_put_bayer_12_c(uint16_t *out, ptrdiff_t linesize,
-                                       int16_t *block, const int16_t *qmat)
+                                       int32_t *block, const int16_t *qmat,
+                                       const uint16_t *lin_curve)
 {
-    prores_idct_12(block, qmat);
-    put_pixel_bayer_12(out, linesize << 1, block);
+    prores_idct_bayer_32(block, qmat);
+    put_pixel_bayer_lin_curve_12(out, linesize << 1, block, lin_curve);
 }
 
 av_cold void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample)
diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h
index f8b57d7e87..75c782fb56 100644
--- a/libavcodec/proresdsp.h
+++ b/libavcodec/proresdsp.h
@@ -30,7 +30,8 @@ typedef struct ProresDSPContext {
     int idct_permutation_type;
     uint8_t idct_permutation[64];
     void (*idct_put)(uint16_t *out, ptrdiff_t linesize, int16_t *block, const 
int16_t *qmat);
-    void (*idct_put_bayer)(uint16_t *out, ptrdiff_t linesize, int16_t *block, 
const int16_t *qmat);
+    void (*idct_put_bayer)(uint16_t *out, ptrdiff_t linesize, int32_t *block, 
const int16_t *qmat,
+                           const uint16_t *lin_curve);
 } ProresDSPContext;
 
 void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample);
diff --git a/libavcodec/vulkan/prores_raw_idct.comp.glsl 
b/libavcodec/vulkan/prores_raw_idct.comp.glsl
index 3393ea3402..dcc8626d48 100644
--- a/libavcodec/vulkan/prores_raw_idct.comp.glsl
+++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl
@@ -41,6 +41,7 @@ layout (set = 0, binding = 1, scalar) readonly buffer 
frame_data_buf {
 layout (push_constant, scalar) uniform pushConstants {
    u8buf pkt_data;
    uint8_t qmat[64];
+   uint16_t lin_curve[8];
 };
 
 #define COMP_ID (gl_LocalInvocationID.z)
@@ -67,6 +68,7 @@ const u8vec2 scan[64] = {
 };
 
 shared uint8_t qmat_buf[64];
+shared uint lin_curve_buf[8];
 
 void main(void)
 {
@@ -75,32 +77,30 @@ void main(void)
 
     uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
     u8vec2buf hdr_data = u8vec2buf(pkt_offset);
-    int qscale = pack16(hdr_data[0].v.yx);
+    int qscale = int(hdr_data[0].v.y);
 
     const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
     const uint nb_blocks = 1 << td.log2_nb_blocks;
 
-    /* Copy push-constant qmat into shared memory for fast non-uniform access 
*/
-    if (gl_LocalInvocationIndex < 64)
-        qmat_buf[gl_LocalInvocationIndex] = qmat[gl_LocalInvocationIndex];
+    if (gl_LocalInvocationIndex == 0) {
+        [[unroll]] for (uint i = 0; i < 64; i++) qmat_buf[i]      = qmat[i];
+        [[unroll]] for (uint i = 0; i < 8;  i++) lin_curve_buf[i] = 
uint(lin_curve[i]);
+    }
     barrier();
 
     [[unroll]]
     for (uint y = 0; y < 8; y++) {
         uint block_off = y*8 + ROW_ID;
         int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + 
scan[block_off])[0]);
-        float vf = float(sign_extend(v, 16)) / 32768.0;
-        vf *= qmat_buf[block_off] * qscale;
-        blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = (vf / (64*4.56)) *
-                                                      idct_scale[block_off];
+        /* Dequantize (coeff * qmat * qscale), matching the reference decoder 
*/
+        float vf = float(sign_extend(v, 16)) * float(qmat_buf[block_off]) * 
float(qscale);
+        blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = vf * 
idct_scale[block_off];
     }
 
     /* Column-wise iDCT */
     idct8(BLOCK_ID, COMP_ID*72 + ROW_ID, 9);
     barrier();
 
-    blocks[BLOCK_ID][COMP_ID*72 + ROW_ID * 9] += 0.5f;
-
     /* Row-wise iDCT */
     idct8(BLOCK_ID, COMP_ID*72 + ROW_ID * 9, 1);
     barrier();
@@ -111,11 +111,22 @@ void main(void)
 
     [[unroll]]
     for (uint y = 0; y < 8; y++) {
-        int v = int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID]*4095.0));
-        v = clamp(v, 0, 4095);
-        v <<= 4;
+        /* Bias the signed iDCT output into the reference's unsigned 16-bit 
space */
+        int u = clamp(int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID])) 
+ 32768,
+                      0, 65535);
+
+        /* 8-point combined linearization curve (inv. transfer fn +
+         * encoder-defined shaping). cp1 - cp0 is the segment slope; for the
+         * final segment cp[8] == 0. */
+        uint seg  = uint(u) >> 13;
+        uint frac = uint(u) & 0x1FFFu;
+        uint cp0  = lin_curve_buf[seg];
+        uint cp1  = seg < 7u ? lin_curve_buf[seg + 1u] : 0u;
+        uint outv = (cp0 * 8192u + ((cp1 - cp0) & 0xFFFFu) * frac + 4096u) >> 
13u;
+        outv = min(outv, 0xFFFFu);
+
         imageStore(dst,
                    offs + 2*ivec2(BLOCK_ID*8 + ROW_ID, y),
-                   ivec4(v));
+                   ivec4(outv));
     }
 }
diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c
index 953b67d592..b6314ab693 100644
--- a/libavcodec/vulkan_prores_raw.c
+++ b/libavcodec/vulkan_prores_raw.c
@@ -52,6 +52,7 @@ typedef struct ProResRAWVulkanDecodeContext {
 typedef struct DecodePushData {
     VkDeviceAddress pkt_data;
     uint8_t  qmat[64];
+    uint16_t lin_curve[8];
 } DecodePushData;
 
 typedef struct TileData {
@@ -232,9 +233,10 @@ static int vk_prores_raw_end_frame(AVCodecContext *avctx)
         .pkt_data = slices_buf->address,
     };
     memcpy(pd_decode.qmat, prr->qmat, 64);
+    memcpy(pd_decode.lin_curve, prr->lin_curve, sizeof(pd_decode.lin_curve));
     ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
                                    VK_SHADER_STAGE_COMPUTE_BIT,
-                                   0, sizeof(pd_decode) - 64, &pd_decode);
+                                   0, offsetof(DecodePushData, qmat), 
&pd_decode);
 
     vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
 
@@ -302,7 +304,7 @@ static int init_decode_shader(AVCodecContext *avctx, 
FFVulkanContext *s,
 {
     int err;
 
-    ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData) - 64,
+    ff_vk_shader_add_push_const(shd, 0, offsetof(DecodePushData, qmat),
                                 VK_SHADER_STAGE_COMPUTE_BIT);
     ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
                       (uint32_t []) { 1, 4, 1 }, 0);
@@ -338,7 +340,7 @@ static int init_idct_shader(AVCodecContext *avctx, 
FFVulkanContext *s,
     };
     for (int i = 0; i < 64; i++)
         SPEC_LIST_ADD(sl, 18 + i, 32,
-                      av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 
7]));
+                      av_float2int(8*idct_8_scales[i >> 3]*idct_8_scales[i & 
7]));
 
     ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
                       (uint32_t []) { 8, nb_blocks, 4 }, 0);

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 05/06: prores_raw: synchronize decoder with reference implementation

Reply via email to