Hi Vittorio
Sorry for the slow response - this should have been a reply to your reply
to my review of your original patch, but still relevant here:
+++ b/libavcodec/texturedsp.c
>
<snip>
> +
> +static inline void dxt1_block_internal(uint8_t *dst, ptrdiff_t stride,
> + const uint8_t *block, uint8_t
> alpha)
> +{
> + uint32_t tmp, code;
> + uint16_t color0, color1;
> + uint8_t r0, g0, b0, r1, g1, b1;
> + int x, y;
> +
> + color0 = AV_RL16(block);
> + color1 = AV_RL16(block + 2);
> +
> + tmp = (color0 >> 11) * 255 + 16;
> + r0 = (uint8_t) ((tmp / 32 + tmp) / 32);
> + tmp = ((color0 & 0x07E0) >> 5) * 255 + 32;
> + g0 = (uint8_t) ((tmp / 64 + tmp) / 64);
> + tmp = (color0 & 0x001F) * 255 + 16;
> + b0 = (uint8_t) ((tmp / 32 + tmp) / 32);
> +
> + tmp = (color1 >> 11) * 255 + 16;
> + r1 = (uint8_t) ((tmp / 32 + tmp) / 32);
> + tmp = ((color1 & 0x07E0) >> 5) * 255 + 32;
> + g1 = (uint8_t) ((tmp / 64 + tmp) / 64);
> + tmp = (color1 & 0x001F) * 255 + 16;
> + b1 = (uint8_t) ((tmp / 32 + tmp) / 32);
> +
> + code = AV_RL32(block + 4);
> +
> + if (color0 > color1) {
> + for (y = 0; y < 4; y++) {
> + for (x = 0; x < 4; x++) {
> + uint32_t pixel = 0;
> + uint32_t pos_code = (code >> 2 * (x + y * 4)) & 0x03;
> +
> + switch (pos_code) {
> + case 0:
> + pixel = RGBA(r0, g0, b0, 255);
> + break;
> + case 1:
> + pixel = RGBA(r1, g1, b1, 255);
> + break;
> + case 2:
> + pixel = RGBA((2 * r0 + r1) / 3,
> + (2 * g0 + g1) / 3,
> + (2 * b0 + b1) / 3,
> + 255);
> + break;
> + case 3:
> + pixel = RGBA((r0 + 2 * r1) / 3,
> + (g0 + 2 * g1) / 3,
> + (b0 + 2 * b1) / 3,
> + 255);
> + break;
> + }
> +
> + AV_WL32(dst + x * 4 + y * stride, pixel);
> + }
> + }
> + } else {
> + for (y = 0; y < 4; y++) {
> + for (x = 0; x < 4; x++) {
> + uint32_t pixel = 0;
> + uint32_t pos_code = (code >> 2 * (x + y * 4)) & 0x03;
> +
> + switch (pos_code) {
> + case 0:
> + pixel = RGBA(r0, g0, b0, 255);
> + break;
> + case 1:
> + pixel = RGBA(r1, g1, b1, 255);
> + break;
> + case 2:
> + pixel = RGBA((r0 + r1) / 2,
> + (g0 + g1) / 2,
> + (b0 + b1) / 2,
> + 255);
> + break;
> + case 3:
> + pixel = RGBA(0, 0, 0, alpha);
> + break;
> + }
> +
> + AV_WL32(dst + x * 4 + y * stride, pixel);
> + }
> + }
> + }
> +}
>
etc...
For DXT formats you still have a lot of code duplication calculating the
four colours and you asked what my strategy would be. You also calculate
the relevant one of the four colours each time for all sixteen pixels,
whereas I would expect better performance by calculating the four colours
once then fetching using the index for each of the sixteen pixels. I would
suggest something like:
// colors is an array of 4 uint32_t
static void dxt_decode_colors(const uint8_t *block, uint32_t *colors,
unsigned int is_dxt1, uint32_t dxt1_black_value)
{
// calculate colors here using your existing method and store as 4
uint32_t to colors
}
static inline void dxt1_block_internal(uint32_t *dst, ptrdiff_t
stride, const uint8_t *block, uint32_t dxt1_black_value)
{
uint32_t colors[4], indices;
unsigned int x, y;
dxt_decode_colors(block, colors, 1, dxt1_black_value);
indices = AV_RL32(block + 4);
for (y=0; y<4; y++) {
for (x=0; x<4; x++) {
dst[x] = colors[indices&3];
indices >>= 2;
}
dst += stride;
}
}
And so on with appropriate alpha handling for each of the other DXT formats.
Cheers - Tom
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel