Re: [libav-devel] [PATCH] MS Expression Encoder Screen decoder

Jason Garrett-Glaser Sat, 07 Jul 2012 08:26:04 -0700

I'll try to do some comments...

> +static const uint8_t zigzag_scan[64] = {
> +    0,   1,  8, 16,  9,  2,  3, 10,
> +    17, 24, 32, 25, 18, 11,  4,  5,
> +    12, 19, 26, 33, 40, 48, 41, 34,
> +    27, 20, 13,  6,  7, 14, 21, 28,
> +    35, 42, 49, 56, 57, 50, 43, 36,
> +    29, 22, 15, 23, 30, 37, 44, 51,
> +    58, 59, 52, 45, 38, 31, 39, 46,
> +    53, 60, 61, 54, 47, 55, 62, 63
> +};


Is this the same as any existing zigzag?  If so, use the appropriate
existing array.

> +typedef struct MSS4Context {
> +    AVFrame  pic;
> +
> +    VLC      dc_vlc[2], ac_vlc[2];
> +    VLC      vec_entry_vlc[2];
> +    int      block[64];

Do the DCT elements have to be 32-bit?  If not, make them 16-bit like
other codecs.

> +static av_always_inline int get_coeff_bits(GetBitContext *gb, int nbits)
> +{
> +    int val;
> +
> +    if (!nbits)
> +        return 0;
> +
> +    val = get_bits(gb, nbits);
> +    if (val < (1 << (nbits - 1)))
> +        val -= (1 << nbits) - 1;

Is this a sign thing?  This looks like it can be done branchlessly.

> +    int val;
> +
> +    val = get_vlc2(gb, vlc->table, vlc->bits, 2);

These two lines look mergeable.

> +static int decode_dct(GetBitContext *gb, VLC *dc_vlc, VLC *ac_vlc, int 
> *block,
> +                      int *prev_dc, int prev_dc_stride, int bx, int by,
> +                      uint16_t *quant_mat)
> +{
> +    int skip, val, pos = 1, zz_pos, dc;
> +
> +    memset(block, 0, sizeof(*block) * 64);

I suggest using clear_block for this if you convert to int16_t DCT coefficients.

> +            int l, tl, t;
> +
> +            l  = prev_dc[-1];
> +            tl = prev_dc[-1 - prev_dc_stride];
> +            t  = prev_dc[   - prev_dc_stride];

The first line here can probably be merged with the others.

> +            if (FFABS(t - tl) <= FFABS(l - tl))
> +                dc += l;
> +            else
> +                dc += t;

This seems weird; we're using the one that's farther away?  Okay if
it's right, but maybe a comment explaining this DC prediction scheme?

> +    while (pos < 64) {
> +        val = get_vlc2(gb, ac_vlc->table, 9, 2);
> +        if (!val)
> +            return 0;
> +        if (val == -1)
> +            return -1;

Maybe if( val <= 0 ) return val?  Possibly with a comment explaining
how this kills two birds with one stone (0 case, error case?).

> +        if (val == 0xF0) {
> +            pos += 16;
> +            continue;
> +        }
> +        skip = val >> 4;
> +        val  = get_coeff_bits(gb, val & 0xF);
> +        pos += skip;
> +        if (pos >= 64)
> +            return -1;
> +
> +        zz_pos = zigzag_scan[pos];
> +        block[zz_pos] = val * quant_mat[zz_pos];
> +        pos++;
> +    }
> +
> +    return pos == 64 ? 0 : -1;

I don't think it's possible to break from this loop in any other way
-- can this condition ever trigger -1?

> +#define DCT_TEMPLATE(blk, step, SOP, shift)                         \
> +    const int t0 = -39409 * blk[7 * step] -  58980 * blk[1 * step]; \
> +    const int t1 =  39410 * blk[1 * step] -  58980 * blk[7 * step]; \
> +    const int t2 = -33410 * blk[5 * step] - 167963 * blk[3 * step]; \
> +    const int t3 =  33410 * blk[3 * step] - 167963 * blk[5 * step]; \
> +    const int t4 =          blk[3 * step] +          blk[7 * step]; \
> +    const int t5 =          blk[1 * step] +          blk[5 * step]; \
> +    const int t6 =  77062 * t4            +  51491 * t5;            \
> +    const int t7 =  77062 * t5            -  51491 * t4;            \
> +    const int t8 =  35470 * blk[2 * step] -  85623 * blk[6 * step]; \
> +    const int t9 =  35470 * blk[6 * step] +  85623 * blk[2 * step]; \
> +    const int tA = SOP(blk[0 * step] - blk[4 * step]);              \
> +    const int tB = SOP(blk[0 * step] + blk[4 * step]);              \
> +                                                                    \
> +    blk[0 * step] = (  t1 + t6  + t9 + tB) >> shift;                \
> +    blk[1 * step] = (  t3 + t7  + t8 + tA) >> shift;                \
> +    blk[2 * step] = (  t2 + t6  - t8 + tA) >> shift;                \
> +    blk[3 * step] = (  t0 + t7  - t9 + tB) >> shift;                \
> +    blk[4 * step] = (-(t0 + t7) - t9 + tB) >> shift;                \
> +    blk[5 * step] = (-(t2 + t6) - t8 + tA) >> shift;                \
> +    blk[6 * step] = (-(t3 + t7) + t8 + tA) >> shift;                \
> +    blk[7 * step] = (-(t1 + t6) + t9 + tB) >> shift;                \
> +
> +#define SOP_ROW(a) ((a) << 16) + 0x2000
> +#define SOP_COL(a) ((a + 32) << 16)
> +
> +static void dct_put(uint8_t *dst, int stride, int *block)
> +{
> +    int i, j;
> +    int *ptr;
> +
> +    ptr = block;
> +    for (i = 0; i < 8; i++) {
> +        DCT_TEMPLATE(ptr, 1, SOP_ROW, 13);
> +        ptr += 8;
> +    }
> +
> +    ptr = block;
> +    for (i = 0; i < 8; i++) {
> +        DCT_TEMPLATE(ptr, 8, SOP_COL, 22);
> +        ptr++;
> +    }
> +
> +    ptr = block;
> +    for (j = 0; j < 8; j++) {
> +        for (i = 0; i < 8; i++)
> +            dst[i] = av_clip_uint8(ptr[i] + 128);
> +        dst += stride;
> +        ptr += 8;
> +    }
> +}
> +
> +static void gen_quant_mat(uint16_t *qmat, const uint8_t *ref, float scale)
> +{
> +    int i;
> +
> +    for (i = 0; i < 64; i++)
> +        qmat[i] = (uint16_t)(ref[i] * scale + 50.0) / 100;
> +}

Floating-point in a decoder?!  This can't possibly be right, can it?
Even if it's what the reference does, we need to match the actual
behavior, which isn't going to be identical between machines.

> +static void decode_dct_block(MSS4Context *c, GetBitContext *gb,
> +                             uint8_t *dst[3], int mb_x, int mb_y)
> +{
> +    int i, j, k, ret;
> +    int *prev_dc;
> +    uint8_t *out = dst[0];
> +
> +    prev_dc = c->prev_dc[0] + c->prev_dc_stride[0] + mb_x * 2;
> +    for (j = 0; j < 2; j++) {
> +        for (i = 0; i < 2; i++) {
> +            int xpos = mb_x * 2 + i;
> +            ret = decode_dct(gb, c->dc_vlc, c->ac_vlc, c->block, prev_dc + i,
> +                             c->prev_dc_stride[0], xpos, mb_y * 2 + j,
> +                             c->quant_mat[0]);
> +            if (ret) {
> +                c->got_error = 1;
> +                return;
> +            }
> +            dct_put(out + xpos * 8, c->pic.linesize[0],
> +                    c->block);
> +        }
> +        out     += 8 * c->pic.linesize[0];
> +        prev_dc += c->prev_dc_stride[0];
> +    }
> +
> +    for (i = 1; i < 3; i++) {
> +        ret = decode_dct(gb, c->dc_vlc + 1, c->ac_vlc + 1,
> +                         c->block,
> +                         c->prev_dc[i] + mb_x + c->prev_dc_stride[i],
> +                         c->prev_dc_stride[i], mb_x, mb_y,
> +                         c->quant_mat[1]);
> +        if (ret) {
> +            c->got_error = 1;
> +            return;
> +        }

Any reason why we have this "got error" thing here and not just
returning an error code?

> +        dct_put(c->imgbuf[i], 8, c->block);
> +        out = dst[i] + mb_x * 16;
> +        for (j = 0; j < 16; j++) {
> +            for (k = 0; k < 8; k++)
> +                AV_WN16A(out + k * 2, c->imgbuf[i][k + (j & ~1) * 4] * 
> 0x101);
> +            out += c->pic.linesize[i];
> +        }

Explain what's going on here?

> +static int get_value_cached(GetBitContext *gb, int vec_pos, uint8_t *vec,
> +                            int vec_size, int component, int shift, int 
> *prev)
> +{
> +    if (vec_pos < vec_size)
> +        return vec[vec_pos];
> +    if (!get_bits1(gb))
> +        return prev[component];
> +    prev[component] = get_bits(gb, 8 - shift) << shift;
> +    return prev[component];
> +}
> +
> +#define MKVAL(vals)  (vals[0] | (vals[1] << 3) | (vals[2] << 6))
> +
> +static void decode_image_block(MSS4Context *ctx, GetBitContext *gb,
> +                               uint8_t *picdst[3], int mb_x, int mb_y)
> +{
> +    uint8_t vec[3][4];
> +    int     vec_len[3];
> +    int     sel_len[3], sel_flag[3];
> +    int     i, j, k, mode, split;
> +    int     prev_vec1 = 0, prev_split = 0;
> +    int     vals[3], prev_pix[3];
> +    int     prev_mode[16];
> +    uint8_t *dst[3];
> +
> +    const int val_shift = ctx->quality == 100 ? 0 : 2;
> +
> +    for (i = 0; i < 3; i++)
> +        dst[i] = ctx->imgbuf[i];
> +    memset(prev_mode, 0, sizeof(prev_mode));
> +    memset(prev_pix,  0, sizeof(prev_pix));
> +    memset(vals,      0, sizeof(vals));
> +    prev_vec1 = 0;
> +
> +    for (i = 0; i < 3; i++) {
> +        vec_len[i] = vec_len_syms[!!i][get_unary(gb, 0, 3)];
> +        for (j = 0; j < vec_len[i]; j++) {
> +            vec[i][j]  = get_coeff(gb, &ctx->vec_entry_vlc[!!i]);
> +            vec[i][j] += ctx->prev_vec[i][j];
> +            ctx->prev_vec[i][j] = vec[i][j];
> +        }
> +        sel_flag[i] = vec_len[i] > 1;
> +        sel_len[i]  = vec_len[i] > 2 ? vec_len[i] - 2 : 0;
> +    }
> +
> +    for (j = 0; j < 16; j++) {
> +        if (get_bits1(gb)) {
> +            split = 0;
> +            if (get_bits1(gb)) {
> +                prev_mode[0] = 0;
> +                vals[0] = vals[1] = vals[2] = 0;
> +                mode = 2;
> +            } else {
> +                mode = get_bits1(gb);
> +                if (mode)
> +                    split = get_bits(gb, 4);
> +            }
> +            for (i = 0; i < 16; i++) {
> +                if (mode <= 1) {
> +                    vals[0] =  prev_mode[i]       & 7;
> +                    vals[1] = (prev_mode[i] >> 3) & 7;
> +                    vals[2] =  prev_mode[i] >> 6;
> +                    if (mode == 1 && i == split) {
> +                        read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> +                    }
> +                } else if (mode == 2) {
> +                    if (get_bits1(gb))
> +                        read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> +                }
> +                for (k = 0; k < 3; k++)
> +                    *dst[k]++ = get_value_cached(gb, vals[k], vec[k],
> +                                                 vec_len[k], k,
> +                                                 val_shift, prev_pix);
> +                prev_mode[i] = MKVAL(vals);
> +            }
> +        } else {
> +            if (get_bits1(gb)) {
> +                split = get_bits(gb, 4);
> +                if (split >= prev_split)
> +                    split++;
> +                prev_split = split;
> +            } else {
> +                split = prev_split;
> +            }
> +            if (split) {
> +                vals[0] =  prev_mode[0]       & 7;
> +                vals[1] = (prev_mode[0] >> 3) & 7;
> +                vals[2] =  prev_mode[0] >> 6;
> +                for (i = 0; i < 3; i++) {
> +                    for (k = 0; k < split; k++) {
> +                        *dst[i]++ = get_value_cached(gb, vals[i], vec[i],
> +                                                     vec_len[i], i, 
> val_shift,
> +                                                     prev_pix);
> +                        prev_mode[k] = MKVAL(vals);
> +                    }
> +                }
> +            }
> +
> +            if (split != 16) {
> +                vals[0] =  prev_vec1       & 7;
> +                vals[1] = (prev_vec1 >> 3) & 7;
> +                vals[2] =  prev_vec1 >> 6;
> +                if (get_bits1(gb)) {
> +                    read_vec_pos(gb, vals, sel_flag, sel_len, vals);
> +                    prev_vec1 = MKVAL(vals);
> +                }
> +                for (i = 0; i < 3; i++) {
> +                    for (k = 0; k < 16 - split; k++) {
> +                        *dst[i]++ = get_value_cached(gb, vals[i], vec[i],
> +                                                     vec_len[i], i, 
> val_shift,
> +                                                     prev_pix);
> +                        prev_mode[split + k] = MKVAL(vals);
> +                    }
> +                }
> +            }
> +        }
> +    }

This is really confusing code, maybe it could use some comments?  I'm
not even sure quite what it does.

> +    for (i = 0; i < 3; i++)
> +        for (j = 0; j < 16; j++)
> +            memcpy(picdst[i] + mb_x * 16 + j * ctx->pic.linesize[i],
> +                   ctx->imgbuf[i] + j * 16, 16);

Don't we have a dsputil function for this?

> +    if (c->quality != quality) {
> +        float scale;
> +        c->quality = quality;
> +        if (quality > 50)
> +            scale = 200.0f - 2 * quality;
> +        else
> +            scale = 5000.0f / quality;
> +        for (i = 0; i < 2; i++)
> +            gen_quant_mat(c->quant_mat[i],
> +                          i ? mss4_chroma_quant : mss4_luma_quant,
> +                          scale);
> +    }

Same comment here on float code.

Jason
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] MS Expression Encoder Screen decoder

Reply via email to