This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 9c4055296525f69f5c5a2666cfd0cddd70110649 Author: Lynne <[email protected]> AuthorDate: Fri May 15 05:25:19 2026 +0900 Commit: Lynne <[email protected]> CommitDate: Sun May 17 12:17:16 2026 +0900 prores_raw: synchronize decoder with reference implementation This completes the reverse engineering of the decoder. The commit applies the linearization curve from the previous patch. --- libavcodec/prores_raw.c | 19 ++++++----- libavcodec/prores_raw_parser.c | 21 +++--------- libavcodec/proresdsp.c | 50 +++++++++++++++++++++++++---- libavcodec/proresdsp.h | 3 +- libavcodec/vulkan/prores_raw_idct.comp.glsl | 39 ++++++++++++++-------- libavcodec/vulkan_prores_raw.c | 8 +++-- 6 files changed, 90 insertions(+), 50 deletions(-) diff --git a/libavcodec/prores_raw.c b/libavcodec/prores_raw.c index f4a1bd03ad..68a97abf56 100644 --- a/libavcodec/prores_raw.c +++ b/libavcodec/prores_raw.c @@ -45,15 +45,19 @@ static av_cold int decode_init(AVCodecContext *avctx) { ProResRAWContext *s = avctx->priv_data; - avctx->bits_per_raw_sample = 12; + /* The codec outputs linear data, with the transfer function of the + * camera and any adjustments built into an 8-point linearization curve */ + avctx->bits_per_raw_sample = 16; + avctx->color_trc = AVCOL_TRC_LINEAR; avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; - avctx->color_trc = AVCOL_TRC_UNSPECIFIED; avctx->colorspace = AVCOL_SPC_UNSPECIFIED; s->pix_fmt = AV_PIX_FMT_NONE; ff_blockdsp_init(&s->bdsp); - ff_proresdsp_init(&s->prodsp, avctx->bits_per_raw_sample); + /* Coefficients and the iDCT are 12-bit, the linearization curve then + * expands the result to the 16-bit linear output range. */ + ff_proresdsp_init(&s->prodsp, 12); ff_permute_scantable(s->scan, ff_prores_interlaced_scan, s->prodsp.idct_permutation); @@ -137,7 +141,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext *tile, const int block_mask = nb_blocks - 1; const int nb_codes = 64 * nb_blocks; - LOCAL_ALIGNED_32(int16_t, block, [64*16]); + LOCAL_ALIGNED_32(int32_t, block, [64*16]); int16_t sign = 0; int16_t dc_add = 0; @@ -158,8 +162,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext *tile, if ((ret = init_get_bits8(&gb, data, size)) < 0) return ret; - for (int n = 0; n < nb_blocks; n++) - s->bdsp.clear_block(block + n*64); + memset(block, 0, nb_blocks * 64 * sizeof(*block)); /* Special handling for first block */ int dc = get_value(&gb, 700); @@ -234,7 +237,7 @@ static int decode_comp(AVCodecContext *avctx, TileContext *tile, for (int n = 0; n < nb_blocks; n++) { uint16_t *ptr = dst + n*16; - s->prodsp.idct_put_bayer(ptr, linesize, block + n*64, qmat); + s->prodsp.idct_put_bayer(ptr, linesize, block + n*64, qmat, s->lin_curve); } return 0; @@ -265,7 +268,7 @@ static int decode_tile(AVCodecContext *avctx, TileContext *tile, return AVERROR_INVALIDDATA; for (int i = 0; i < 64; i++) - qmat[i] = s->qmat[i] * scale >> 1; + qmat[i] = s->qmat[i] * scale; const uint8_t *comp_start = gb->buffer_start + header_len; diff --git a/libavcodec/prores_raw_parser.c b/libavcodec/prores_raw_parser.c index a32e4cf394..c75c3a30d4 100644 --- a/libavcodec/prores_raw_parser.c +++ b/libavcodec/prores_raw_parser.c @@ -62,24 +62,11 @@ static int prores_raw_parse(AVCodecParserContext *s, AVCodecContext *avctx, } /* Vendor header (e.g. "peac" for Panasonic or "atm0" for Atmos) */ - switch (bytestream2_get_be32(&gb)) { - case MKBETAG('p','e','a','c'): - /* Internal recording from a Panasonic camera, V-Log */ - avctx->color_primaries = AVCOL_PRI_V_GAMUT; - avctx->color_trc = AVCOL_TRC_V_LOG; - break; - case MKBETAG('a','t','m','0'): - /* External recording from an Atomos recorder. Cameras universally - * record in their own native log curve internally, but linearize it - * when outputting RAW externally */ - avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; - avctx->color_trc = AVCOL_TRC_LINEAR; - break; - default: - avctx->color_trc = AVCOL_TRC_UNSPECIFIED; - break; - }; + bytestream2_skip(&gb, 4); + avctx->colorspace = AVCOL_SPC_UNSPECIFIED; + avctx->color_primaries = AVCOL_PRI_UNSPECIFIED; + avctx->color_trc = AVCOL_TRC_LINEAR; s->width = bytestream2_get_be16(&gb); s->height = bytestream2_get_be16(&gb); s->coded_width = FFALIGN(s->width, 16); diff --git a/libavcodec/proresdsp.c b/libavcodec/proresdsp.c index eb5dbf4799..cf4aa9d0cf 100644 --- a/libavcodec/proresdsp.c +++ b/libavcodec/proresdsp.c @@ -40,6 +40,14 @@ #define BIT_DEPTH 12 #include "simple_idct_template.c" #undef BIT_DEPTH +#undef IN_IDCT_DEPTH + +/* 32bit iDCT for the ProRes RAW */ +#define IN_IDCT_DEPTH 32 +#define BIT_DEPTH 12 +#include "simple_idct_template.c" +#undef BIT_DEPTH +#undef IN_IDCT_DEPTH /** * Special version of ff_simple_idct_int16_10bit() which does dequantization @@ -74,6 +82,24 @@ static void prores_idct_12(int16_t *restrict block, const int16_t *restrict qmat } } +/* + * 32-bit iDCT for the ProRes RAW + * qmat must be s->qmat[i] * scale + */ +static void prores_idct_bayer_32(int32_t *restrict block, const int16_t *restrict qmat) +{ + for (int i = 0; i < 64; i++) + block[i] = (block[i] * qmat[i]) >> 1; + + for (int i = 0; i < 8; i++) + idctRowCondDC_int32_12bit(block + i*8, 0); + + for (int i = 0; i < 8; i++) { + block[i] += 8192; + idctSparseCol_int32_12bit(block + i); + } +} + #define CLIP_MIN (1 << 2) ///< minimum value for clipping resulting pixels #define CLIP_MAX_10 (1 << 10) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels #define CLIP_MAX_12 (1 << 12) - CLIP_MIN - 1 ///< maximum value for clipping resulting pixels @@ -99,12 +125,21 @@ static inline void put_pixel(uint16_t *dst, ptrdiff_t linesize, const int16_t *i } } -static inline void put_pixel_bayer_12(uint16_t *dst, ptrdiff_t linesize, - const int16_t *in) +/* Apply the 8-point combined linearization curve (inv. transfer fn + encoder shaping) */ +static inline void put_pixel_bayer_lin_curve_12(uint16_t *dst, ptrdiff_t linesize, + const int32_t *in, const uint16_t *lin_curve) { for (int y = 0; y < 8; y++, dst += linesize) { - for (int x = 0; x < 8; x++) - dst[x*2] = CLIP_12(in[(y << 3) + x]) << 4; + for (int x = 0; x < 8; x++) { + /* Convert the 32-bit input into 16-bits (lrintf(x*16 - 15.5f) = 16) */ + int u = av_clip_uint16(in[(y << 3) + x]*16 - 16); + uint32_t seg = (uint32_t)u >> 13; + uint32_t frac = (uint32_t)u & 0x1FFF; + uint32_t cp0 = lin_curve[seg]; + uint32_t cp1 = seg < 7 ? lin_curve[seg + 1] : 0; + uint32_t o = (cp0 * 8192 + ((cp1 - cp0) & 0xFFFF) * frac + 4096) >> 13; + dst[x*2] = FFMIN(o, 0xFFFF); + } } } @@ -131,10 +166,11 @@ static void prores_idct_put_12_c(uint16_t *out, ptrdiff_t linesize, int16_t *blo } static void prores_idct_put_bayer_12_c(uint16_t *out, ptrdiff_t linesize, - int16_t *block, const int16_t *qmat) + int32_t *block, const int16_t *qmat, + const uint16_t *lin_curve) { - prores_idct_12(block, qmat); - put_pixel_bayer_12(out, linesize << 1, block); + prores_idct_bayer_32(block, qmat); + put_pixel_bayer_lin_curve_12(out, linesize << 1, block, lin_curve); } av_cold void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample) diff --git a/libavcodec/proresdsp.h b/libavcodec/proresdsp.h index f8b57d7e87..75c782fb56 100644 --- a/libavcodec/proresdsp.h +++ b/libavcodec/proresdsp.h @@ -30,7 +30,8 @@ typedef struct ProresDSPContext { int idct_permutation_type; uint8_t idct_permutation[64]; void (*idct_put)(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat); - void (*idct_put_bayer)(uint16_t *out, ptrdiff_t linesize, int16_t *block, const int16_t *qmat); + void (*idct_put_bayer)(uint16_t *out, ptrdiff_t linesize, int32_t *block, const int16_t *qmat, + const uint16_t *lin_curve); } ProresDSPContext; void ff_proresdsp_init(ProresDSPContext *dsp, int bits_per_raw_sample); diff --git a/libavcodec/vulkan/prores_raw_idct.comp.glsl b/libavcodec/vulkan/prores_raw_idct.comp.glsl index 3393ea3402..dcc8626d48 100644 --- a/libavcodec/vulkan/prores_raw_idct.comp.glsl +++ b/libavcodec/vulkan/prores_raw_idct.comp.glsl @@ -41,6 +41,7 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf { layout (push_constant, scalar) uniform pushConstants { u8buf pkt_data; uint8_t qmat[64]; + uint16_t lin_curve[8]; }; #define COMP_ID (gl_LocalInvocationID.z) @@ -67,6 +68,7 @@ const u8vec2 scan[64] = { }; shared uint8_t qmat_buf[64]; +shared uint lin_curve_buf[8]; void main(void) { @@ -75,32 +77,30 @@ void main(void) uint64_t pkt_offset = uint64_t(pkt_data) + td.offset; u8vec2buf hdr_data = u8vec2buf(pkt_offset); - int qscale = pack16(hdr_data[0].v.yx); + int qscale = int(hdr_data[0].v.y); const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1); const uint nb_blocks = 1 << td.log2_nb_blocks; - /* Copy push-constant qmat into shared memory for fast non-uniform access */ - if (gl_LocalInvocationIndex < 64) - qmat_buf[gl_LocalInvocationIndex] = qmat[gl_LocalInvocationIndex]; + if (gl_LocalInvocationIndex == 0) { + [[unroll]] for (uint i = 0; i < 64; i++) qmat_buf[i] = qmat[i]; + [[unroll]] for (uint i = 0; i < 8; i++) lin_curve_buf[i] = uint(lin_curve[i]); + } barrier(); [[unroll]] for (uint y = 0; y < 8; y++) { uint block_off = y*8 + ROW_ID; int v = int(imageLoad(dst, offs + 2*ivec2(BLOCK_ID*8, 0) + scan[block_off])[0]); - float vf = float(sign_extend(v, 16)) / 32768.0; - vf *= qmat_buf[block_off] * qscale; - blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = (vf / (64*4.56)) * - idct_scale[block_off]; + /* Dequantize (coeff * qmat * qscale), matching the reference decoder */ + float vf = float(sign_extend(v, 16)) * float(qmat_buf[block_off]) * float(qscale); + blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID] = vf * idct_scale[block_off]; } /* Column-wise iDCT */ idct8(BLOCK_ID, COMP_ID*72 + ROW_ID, 9); barrier(); - blocks[BLOCK_ID][COMP_ID*72 + ROW_ID * 9] += 0.5f; - /* Row-wise iDCT */ idct8(BLOCK_ID, COMP_ID*72 + ROW_ID * 9, 1); barrier(); @@ -111,11 +111,22 @@ void main(void) [[unroll]] for (uint y = 0; y < 8; y++) { - int v = int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID]*4095.0)); - v = clamp(v, 0, 4095); - v <<= 4; + /* Bias the signed iDCT output into the reference's unsigned 16-bit space */ + int u = clamp(int(round(blocks[BLOCK_ID][COMP_ID*72 + y*9 + ROW_ID])) + 32768, + 0, 65535); + + /* 8-point combined linearization curve (inv. transfer fn + + * encoder-defined shaping). cp1 - cp0 is the segment slope; for the + * final segment cp[8] == 0. */ + uint seg = uint(u) >> 13; + uint frac = uint(u) & 0x1FFFu; + uint cp0 = lin_curve_buf[seg]; + uint cp1 = seg < 7u ? lin_curve_buf[seg + 1u] : 0u; + uint outv = (cp0 * 8192u + ((cp1 - cp0) & 0xFFFFu) * frac + 4096u) >> 13u; + outv = min(outv, 0xFFFFu); + imageStore(dst, offs + 2*ivec2(BLOCK_ID*8 + ROW_ID, y), - ivec4(v)); + ivec4(outv)); } } diff --git a/libavcodec/vulkan_prores_raw.c b/libavcodec/vulkan_prores_raw.c index 953b67d592..b6314ab693 100644 --- a/libavcodec/vulkan_prores_raw.c +++ b/libavcodec/vulkan_prores_raw.c @@ -52,6 +52,7 @@ typedef struct ProResRAWVulkanDecodeContext { typedef struct DecodePushData { VkDeviceAddress pkt_data; uint8_t qmat[64]; + uint16_t lin_curve[8]; } DecodePushData; typedef struct TileData { @@ -232,9 +233,10 @@ static int vk_prores_raw_end_frame(AVCodecContext *avctx) .pkt_data = slices_buf->address, }; memcpy(pd_decode.qmat, prr->qmat, 64); + memcpy(pd_decode.lin_curve, prr->lin_curve, sizeof(pd_decode.lin_curve)); ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd_decode) - 64, &pd_decode); + 0, offsetof(DecodePushData, qmat), &pd_decode); vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1); @@ -302,7 +304,7 @@ static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s, { int err; - ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData) - 64, + ff_vk_shader_add_push_const(shd, 0, offsetof(DecodePushData, qmat), VK_SHADER_STAGE_COMPUTE_BIT); ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL, (uint32_t []) { 1, 4, 1 }, 0); @@ -338,7 +340,7 @@ static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s, }; for (int i = 0; i < 64; i++) SPEC_LIST_ADD(sl, 18 + i, 32, - av_float2int(idct_8_scales[i >> 3]*idct_8_scales[i & 7])); + av_float2int(8*idct_8_scales[i >> 3]*idct_8_scales[i & 7])); ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { 8, nb_blocks, 4 }, 0); _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
