Hi,
On Wed, Nov 2, 2011 at 8:40 AM, Kostya Shishkov
<[email protected]> wrote:
[..]
> #define AVG_32(dst, src, ref) \
> *((uint32_t *)((dst))) = ((*((uint32_t *)((src))) + *((uint32_t
> *)((ref)))) >> 1) & 0x7F7F7F7F
>
> #define AVG_64(dst, src, ref) \
> *((uint64_t *)((dst))) = ((*((uint64_t *)((src))) + *((uint64_t
> *)((ref)))) >> 1) & 0x7F7F7F7F7F7F7F7F
AV_[WR]N{32,64}A() please.
> #define APPLY_DELTA_4 \
> *((uint16_t *)(dst + line_offset )) = \
> *((uint16_t *)(ref )) + delta_tab->deltas[dyad1];\
> *((uint16_t *)(dst + line_offset + 2)) = \
> *((uint16_t *)(ref + 2)) + delta_tab->deltas[dyad2];\
Same all above here.
> if (mode >= 3) {\
> if (is_top_of_cell && !cell->ypos) {\
> *((uint32_t *)(dst)) = *((uint32_t *)(dst + row_offset));\
AV_COPY32A().
> } else \
> AVG_32(dst, ref, dst + row_offset);\
> }
So ... I wonder if we should limit macros here to two-lines or so, and
make everything else inline. Reason - again - is the impossibility of
getting nice debug info from macros (e.g. valgrind line numbers, gdb
line numbers, etc.).
> #define APPLY_DELTA_8 \
> /* apply two 32-bit VQ deltas to next even line */\
> if (is_top_of_cell) { \
> *((uint32_t *)(dst + row_offset )) = \
> replicate32(*((uint32_t *)(ref ))) +
> delta_tab->deltas_m10[dyad1];\
> *((uint32_t *)(dst + row_offset + 4)) = \
> replicate32(*((uint32_t *)(ref + 4))) +
> delta_tab->deltas_m10[dyad2];\
> } else { \
> *((uint32_t *)(dst + row_offset )) = \
> *((uint32_t *)(ref )) + delta_tab->deltas_m10[dyad1];\
> *((uint32_t *)(dst + row_offset + 4)) = \
> *((uint32_t *)(ref + 4)) + delta_tab->deltas_m10[dyad2];\
> } \
AV_[WR]32A().
> /* odd lines are not coded but rather interpolated/replicated */\
> /* first line of the cell on the top of image? - replicate */\
> /* otherwise - interpolate */\
> if (is_top_of_cell && !cell->ypos) {\
> *((uint64_t *)(dst)) = *((uint64_t *)(dst + row_offset));\
AV_COPY64A().
> #define APPLY_DELTA_1011_INTER \
> if (mode == 10) { \
> *((uint32_t *)(dst )) +=
> delta_tab->deltas_m10[dyad1];\
> *((uint32_t *)(dst + 4 )) +=
> delta_tab->deltas_m10[dyad2];\
> *((uint32_t *)(dst + row_offset )) +=
> delta_tab->deltas_m10[dyad1];\
> *((uint32_t *)(dst + row_offset + 4)) +=
> delta_tab->deltas_m10[dyad2];\
> } else { \
> *((uint16_t *)(dst )) += delta_tab->deltas[dyad1];\
> *((uint16_t *)(dst + 2 )) += delta_tab->deltas[dyad2];\
> *((uint16_t *)(dst + row_offset )) += delta_tab->deltas[dyad1];\
> *((uint16_t *)(dst + row_offset + 2)) += delta_tab->deltas[dyad2];\
> }
AV_WN{32,16}A().
And again I'd personally prefer static av_always_inline over macros.
> src32 = (const uint32_t *)src;
> dst32 = (uint32_t *)dst;
>
> /* convert four pixels at once using softSIMD */
> for (x = 0; x < plane->width >> 2; x++)
> *dst32++ = (*src32++ & 0x7F7F7F7F) << 1;
soft-simd? AV_[WR]N32A().
Ronald
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel