On 22/03/15 12:49 PM, Vittorio Giovara wrote:
> +// AAN IDCT

If this isn't already in the tree somewhere and it's generic enough that it can 
be reused, 
then it should be shared like faanidct and added to idctdsp.
And if it's HQ/HQA specific, it still could be split into a new hqdsp context 
for potential 
optimizations.

> +
> +#define FIX_1_082 17734
> +#define FIX_1_847 30274
> +#define FIX_1_414 23170
> +#define FIX_2_613 21407 // divided by two to fit the range
> +
> +#define IDCTMUL(a, b) ((a) * (b) >> 16)
> +
> +static inline void idct_row(int16_t *blk)
> +{
> +    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA;
> +    int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14;
> +
> +    tmp0 = blk[5] - blk[3];
> +    tmp1 = blk[5] + blk[3];
> +    tmp2 = blk[1] - blk[7];
> +    tmp3 = blk[1] + blk[7];
> +    tmp4 = tmp3 - tmp1;
> +    tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847);
> +    tmp6 = IDCTMUL(tmp2,        FIX_1_082) - tmp5;
> +    tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2;
> +    tmp8 = tmp3 + tmp1;
> +    tmp9 = tmp7 * 4 - tmp8;
> +    tmpA = IDCTMUL(tmp4, FIX_1_414) * 4 - tmp9;
> +    tmpB = tmp6 * 4 + tmpA;
> +    tmpC = blk[2] + blk[6];
> +    tmpD = blk[2] - blk[6];
> +    tmpE = blk[0] - blk[4];
> +    tmpF = blk[0] + blk[4];
> +
> +    tmp10 = IDCTMUL(tmpD, FIX_1_414) * 4 - tmpC;
> +    tmp11 = tmpE - tmp10;
> +    tmp12 = tmpF - tmpC;
> +    tmp13 = tmpE + tmp10;
> +    tmp14 = tmpF + tmpC;
> +
> +    blk[0] = tmp14 + tmp8;
> +    blk[1] = tmp13 + tmp9;
> +    blk[2] = tmp11 + tmpA;
> +    blk[3] = tmp12 - tmpB;
> +    blk[4] = tmp12 + tmpB;
> +    blk[5] = tmp11 - tmpA;
> +    blk[6] = tmp13 - tmp9;
> +    blk[7] = tmp14 - tmp8;
> +}
> +
> +static inline void idct_col(int16_t *blk)
> +{
> +    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmpA;
> +    int tmpB, tmpC, tmpD, tmpE, tmpF, tmp10, tmp11, tmp12, tmp13, tmp14;
> +
> +    tmp0 = blk[5 * 8] - blk[3 * 8];
> +    tmp1 = blk[5 * 8] + blk[3 * 8];
> +    tmp2 = blk[1 * 8] * 2 - (blk[7 * 8] >> 2);
> +    tmp3 = blk[1 * 8] * 2 + (blk[7 * 8] >> 2);
> +    tmp4 = tmp3 - tmp1;
> +    tmp5 = IDCTMUL(tmp0 + tmp2, FIX_1_847);
> +    tmp6 = IDCTMUL(tmp2,        FIX_1_082) - tmp5;
> +    tmp7 = tmp5 - IDCTMUL(tmp0, FIX_2_613) * 2;
> +    tmp8 = (tmp3 + tmp1) >> 1;
> +    tmp9 = tmp7 * 2 - tmp8;
> +    tmpA = IDCTMUL(tmp4, FIX_1_414) * 2 - tmp9;
> +    tmpB = tmp6 * 2 + tmpA;
> +    tmpC =  blk[2 * 8] + (blk[6 * 8] >> 1) >> 1;
> +    tmpD =  blk[2 * 8] - (blk[6 * 8] >> 1);
> +    tmpE = (blk[0 * 8] >> 1) - (blk[4 * 8] >> 1) + 0x2020;
> +    tmpF = (blk[0 * 8] >> 1) + (blk[4 * 8] >> 1) + 0x2020;
> +
> +    tmp10 = IDCTMUL(tmpD, FIX_1_414) * 2 - tmpC;
> +    tmp11 = tmpE - tmp10;
> +    tmp12 = tmpF - tmpC;
> +    tmp13 = tmpE + tmp10;
> +    tmp14 = tmpF + tmpC;
> +
> +    blk[0 * 8] = (tmp14 + tmp8) >> 6;
> +    blk[1 * 8] = (tmp13 + tmp9) >> 6;
> +    blk[2 * 8] = (tmp11 + tmpA) >> 6;
> +    blk[3 * 8] = (tmp12 - tmpB) >> 6;
> +    blk[4 * 8] = (tmp12 + tmpB) >> 6;
> +    blk[5 * 8] = (tmp11 - tmpA) >> 6;
> +    blk[6 * 8] = (tmp13 - tmp9) >> 6;
> +    blk[7 * 8] = (tmp14 - tmp8) >> 6;
> +}
> +
> +static void hq_idct_put(uint8_t *dst, int stride, int16_t *block)
> +{
> +    int i, j;
> +
> +    for (i = 0; i < 8; i++)
> +        idct_row(block + i * 8);
> +    for (i = 0; i < 8; i++)
> +        idct_col(block + i);
> +
> +    // or use IDCTDSPContext.put_pixels_clamped()

Bench and see if it's worth using? There's an optimized version for most 
platforms after 
all.

> +    for (i = 0; i < 8; i++) {
> +        for (j = 0; j < 8; j++)
> +            dst[j] = av_clip_uint8(block[j + i * 8]);
> +        dst += stride;
> +    }
> +}
> +
> +static inline void put_blocks(HQContext *c, AVFrame *pic,
> +                              int plane, int x, int y, int ilace,
> +                              int16_t *block0, int16_t *block1)
> +{
> +    uint8_t *p = pic->data[plane] + x;
> +
> +    hq_idct_put(p + y * pic->linesize[plane],
> +                pic->linesize[plane] << ilace, block0);
> +    hq_idct_put(p + (y + (ilace ? 1 : 8)) * pic->linesize[plane],
> +                pic->linesize[plane] << ilace, block1);
> +}

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to