I'll try to do some comments...
> +static const uint8_t zigzag_scan[64] = {
> + 0, 1, 8, 16, 9, 2, 3, 10,
> + 17, 24, 32, 25, 18, 11, 4, 5,
> + 12, 19, 26, 33, 40, 48, 41, 34,
> + 27, 20, 13, 6, 7, 14, 21, 28,
> + 35, 42, 49, 56, 57, 50, 43, 36,
> + 29, 22, 15, 23, 30, 37, 44, 51,
> + 58, 59, 52, 45, 38, 31, 39, 46,
> + 53, 60, 61, 54, 47, 55, 62, 63
> +};
Is this the same as any existing zigzag? If so, use the appropriate
existing array.
> +typedef struct MSS4Context {
> + AVFrame pic;
> +
> + VLC dc_vlc[2], ac_vlc[2];
> + VLC vec_entry_vlc[2];
> + int block[64];
Do the DCT elements have to be 32-bit? If not, make them 16-bit like
other codecs.
> +static av_always_inline int get_coeff_bits(GetBitContext *gb, int nbits)
> +{
> + int val;
> +
> + if (!nbits)
> + return 0;
> +
> + val = get_bits(gb, nbits);
> + if (val < (1 << (nbits - 1)))
> + val -= (1 << nbits) - 1;
Is this a sign thing? This looks like it can be done branchlessly.
> + int val;
> +
> + val = get_vlc2(gb, vlc->table, vlc->bits, 2);
These two lines look mergeable.
> +static int decode_dct(GetBitContext *gb, VLC *dc_vlc, VLC *ac_vlc, int
> *block,
> + int *prev_dc, int prev_dc_stride, int bx, int by,
> + uint16_t *quant_mat)
> +{
> + int skip, val, pos = 1, zz_pos, dc;
> +
> + memset(block, 0, sizeof(*block) * 64);
I suggest using clear_block for this if you convert to int16_t DCT coefficients.
> + int l, tl, t;
> +
> + l = prev_dc[-1];
> + tl = prev_dc[-1 - prev_dc_stride];
> + t = prev_dc[ - prev_dc_stride];
The first line here can probably be merged with the others.
> + if (FFABS(t - tl) <= FFABS(l - tl))
> + dc += l;
> + else
> + dc += t;
This seems weird; we're using the one that's farther away? Okay if
it's right, but maybe a comment explaining this DC prediction scheme?
> + while (pos < 64) {
> + val = get_vlc2(gb, ac_vlc->table, 9, 2);
> + if (!val)
> + return 0;
> + if (val == -1)
> + return -1;
Maybe if( val <= 0 ) return val? Possibly with a comment explaining
how this kills two birds with one stone (0 case, error case?).
> + if (val == 0xF0) {
> + pos += 16;
> + continue;
> + }
> + skip = val >> 4;
> + val = get_coeff_bits(gb, val & 0xF);
> + pos += skip;
> + if (pos >= 64)
> + return -1;
> +
> + zz_pos = zigzag_scan[pos];
> + block[zz_pos] = val * quant_mat[zz_pos];
> + pos++;
> + }
> +
> + return pos == 64 ? 0 : -1;
I don't think it's possible to break from this loop in any other way
-- can this condition ever trigger -1?
> +#define DCT_TEMPLATE(blk, step, SOP, shift) \
> + const int t0 = -39409 * blk[7 * step] - 58980 * blk[1 * step]; \
> + const int t1 = 39410 * blk[1 * step] - 58980 * blk[7 * step]; \
> + const int t2 = -33410 * blk[5 * step] - 167963 * blk[3 * step]; \
> + const int t3 = 33410 * blk[3 * step] - 167963 * blk[5 * step]; \
> + const int t4 = blk[3 * step] + blk[7 * step]; \
> + const int t5 = blk[1 * step] + blk[5 * step]; \
> + const int t6 = 77062 * t4 + 51491 * t5; \
> + const int t7 = 77062 * t5 - 51491 * t4; \
> + const int t8 = 35470 * blk[2 * step] - 85623 * blk[6 * step]; \
> + const int t9 = 35470 * blk[6 * step] + 85623 * blk[2 * step]; \
> + const int tA = SOP(blk[0 * step] - blk[4 * step]); \
> + const int tB = SOP(blk[0 * step] + blk[4 * step]); \
> + \
> + blk[0 * step] = ( t1 + t6 + t9 + tB) >> shift; \
> + blk[1 * step] = ( t3 + t7 + t8 + tA) >> shift; \
> + blk[2 * step] = ( t2 + t6 - t8 + tA) >> shift; \
> + blk[3 * step] = ( t0 + t7 - t9 + tB) >> shift; \
> + blk[4 * step] = (-(t0 + t7) - t9 + tB) >> shift; \
> + blk[5 * step] = (-(t2 + t6) - t8 + tA) >> shift; \
> + blk[6 * step] = (-(t3 + t7) + t8 + tA) >> shift; \
> + blk[7 * step] = (-(t1 + t6) + t9 + tB) >> shift; \
> +
> +#define SOP_ROW(a) ((a) << 16) + 0x2000
> +#define SOP_COL(a) ((a + 32) << 16)
> +
> +static void dct_put(uint8_t *dst, int stride, int *block)
> +{
> + int i, j;
> + int *ptr;
> +
> + ptr = block;
> + for (i = 0; i < 8; i++) {
> + DCT_TEMPLATE(ptr, 1, SOP_ROW, 13);
> + ptr += 8;
> + }
> +
> + ptr = block;
> + for (i = 0; i < 8; i++) {
> + DCT_TEMPLATE(ptr, 8, SOP_COL, 22);
> + ptr++;
> + }
> +
> + ptr = block;
> + for (j = 0; j < 8; j++) {
> + for (i = 0; i < 8; i++)
> + dst[i] = av_clip_uint8(ptr[i] + 128);
> + dst += stride;
> + ptr += 8;
> + }
> +}
> +
> +static void gen_quant_mat(uint16_t *qmat, const uint8_t *ref, float scale)
> +{
> + int i;
> +
> + for (i = 0; i < 64; i++)
> + qmat[i] = (uint16_t)(ref[i] * scale + 50.0) / 100;
> +}
Floating-point in a decoder?! This can't possibly be right, can it?
Even if it's what the reference does, we need to match the actual
behavior, which isn't going to be identical between machines.
> +static void decode_dct_block(MSS4Context *c, GetBitContext *gb,
> + uint8_t *dst[3], int mb_x, int mb_y)
> +{
> + int i, j, k, ret;
> + int *prev_dc;
> + uint8_t *out = dst[0];
> +
> + prev_dc = c->prev_dc[0] + c->prev_dc_stride[0] + mb_x * 2;
> + for (j = 0; j < 2; j++) {
> + for (i = 0; i < 2; i++) {
> + int xpos = mb_x * 2 + i;
> + ret = decode_dct(gb, c->dc_vlc, c->ac_vlc, c->block, prev_dc + i,
> + c->prev_dc_stride[0], xpos, mb_y * 2 + j,
> + c->quant_mat[0]);
> + if (ret) {
> + c->got_error = 1;
> + return;
> + }
> + dct_put(out + xpos * 8, c->pic.linesize[0],
> + c->block);
> + }
> + out += 8 * c->pic.linesize[0];
> + prev_dc += c->prev_dc_stride[0];
> + }
> +
> + for (i = 1; i < 3; i++) {
> + ret = decode_dct(gb, c->dc_vlc + 1, c->ac_vlc + 1,
> + c->block,
> + c->prev_dc[i] + mb_x + c->prev_dc_stride[i],
> + c->prev_dc_stride[i], mb_x, mb_y,
> + c->quant_mat[1]);
> + if (ret) {
> + c->got_error = 1;
> + return;
> + }
Any reason why we have this "got error" thing here and not just
returning an error code?
> + dct_put(c->imgbuf[i], 8, c->block);
> + out = dst[i] + mb_x * 16;
> + for (j = 0; j < 16; j++) {
> + for (k = 0; k < 8; k++)
> + AV_WN16A(out + k * 2, c->imgbuf[i][k + (j & ~1) * 4] *
> 0x101);
> + out += c->pic.linesize[i];
> + }
Explain what's going on here?
> +static int get_value_cached(GetBitContext *gb, int vec_pos, uint8_t *vec,
> + int vec_size, int component, int shift, int
> *prev)
> +{
> + if (vec_pos < vec_size)
> + return vec[vec_pos];
> + if (!get_bits1(gb))
> + return prev[component];
> + prev[component] = get_bits(gb, 8 - shift) << shift;
> + return prev[component];
> +}
> +
> +#define MKVAL(vals) (vals[0] | (vals[1] << 3) | (vals[2] << 6))
> +
> +static void decode_image_block(MSS4Context *ctx, GetBitContext *gb,
> + uint8_t *picdst[3], int mb_x, int mb_y)
> +{
> + uint8_t vec[3][4];
> + int vec_len[3];
> + int sel_len[3], sel_flag[3];
> + int i, j, k, mode, split;
> + int prev_vec1 = 0, prev_split = 0;
> + int vals[3], prev_pix[3];
> + int prev_mode[16];
> + uint8_t *dst[3];
> +
> + const int val_shift = ctx->quality == 100 ? 0 : 2;
> +
> + for (i = 0; i < 3; i++)
> + dst[i] = ctx->imgbuf[i];
> + memset(prev_mode, 0, sizeof(prev_mode));
> + memset(prev_pix, 0, sizeof(prev_pix));
> + memset(vals, 0, sizeof(vals));
> + prev_vec1 = 0;
> +
> + for (i = 0; i < 3; i++) {
> + vec_len[i] = vec_len_syms[!!i][get_unary(gb, 0, 3)];
> + for (j = 0; j < vec_len[i]; j++) {
> + vec[i][j] = get_coeff(gb, &ctx->vec_entry_vlc[!!i]);
> + vec[i][j] += ctx->prev_vec[i][j];
> + ctx->prev_vec[i][j] = vec[i][j];
> + }
> + sel_flag[i] = vec_len[i] > 1;
> + sel_len[i] = vec_len[i] > 2 ? vec_len[i] - 2 : 0;
> + }
> +
> + for (j = 0; j < 16; j++) {
> + if (get_bits1(gb)) {
> + split = 0;
> + if (get_bits1(gb)) {
> + prev_mode[0] = 0;
> + vals[0] = vals[1] = vals[2] = 0;
> + mode = 2;
> + } else {
> + mode = get_bits1(gb);
> + if (mode)
> + split = get_bits(gb, 4);
> + }
> + for (i = 0; i < 16; i++) {
> + if (mode <= 1) {
> + vals[0] = prev_mode[i] & 7;
> + vals[1] = (prev_mode[i] >> 3) & 7;
> + vals[2] = prev_mode[i] >> 6;
> + if (mode == 1 && i == split) {
> + read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> + }
> + } else if (mode == 2) {
> + if (get_bits1(gb))
> + read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> + }
> + for (k = 0; k < 3; k++)
> + *dst[k]++ = get_value_cached(gb, vals[k], vec[k],
> + vec_len[k], k,
> + val_shift, prev_pix);
> + prev_mode[i] = MKVAL(vals);
> + }
> + } else {
> + if (get_bits1(gb)) {
> + split = get_bits(gb, 4);
> + if (split >= prev_split)
> + split++;
> + prev_split = split;
> + } else {
> + split = prev_split;
> + }
> + if (split) {
> + vals[0] = prev_mode[0] & 7;
> + vals[1] = (prev_mode[0] >> 3) & 7;
> + vals[2] = prev_mode[0] >> 6;
> + for (i = 0; i < 3; i++) {
> + for (k = 0; k < split; k++) {
> + *dst[i]++ = get_value_cached(gb, vals[i], vec[i],
> + vec_len[i], i,
> val_shift,
> + prev_pix);
> + prev_mode[k] = MKVAL(vals);
> + }
> + }
> + }
> +
> + if (split != 16) {
> + vals[0] = prev_vec1 & 7;
> + vals[1] = (prev_vec1 >> 3) & 7;
> + vals[2] = prev_vec1 >> 6;
> + if (get_bits1(gb)) {
> + read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> + prev_vec1 = MKVAL(vals);
> + }
> + for (i = 0; i < 3; i++) {
> + for (k = 0; k < 16 - split; k++) {
> + *dst[i]++ = get_value_cached(gb, vals[i], vec[i],
> + vec_len[i], i,
> val_shift,
> + prev_pix);
> + prev_mode[split + k] = MKVAL(vals);
> + }
> + }
> + }
> + }
> + }
This is really confusing code, maybe it could use some comments? I'm
not even sure quite what it does.
> + for (i = 0; i < 3; i++)
> + for (j = 0; j < 16; j++)
> + memcpy(picdst[i] + mb_x * 16 + j * ctx->pic.linesize[i],
> + ctx->imgbuf[i] + j * 16, 16);
Don't we have a dsputil function for this?
> + if (c->quality != quality) {
> + float scale;
> + c->quality = quality;
> + if (quality > 50)
> + scale = 200.0f - 2 * quality;
> + else
> + scale = 5000.0f / quality;
> + for (i = 0; i < 2; i++)
> + gen_quant_mat(c->quant_mat[i],
> + i ? mss4_chroma_quant : mss4_luma_quant,
> + scale);
> + }
Same comment here on float code.
Jason
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel