[libav-devel] [PATCH 2/5] Introduce a TextureDSP module

Vittorio Giovara Tue, 16 Jun 2015 15:39:35 -0700

This module implements generic texture decompression from different
families (DXTC, RGTC, BCn) and texture compression DXTC 1, 3, and 5.


Signed-off-by: Vittorio Giovara <[email protected]>
---
We found differing hashes on rgtc decoding functions (off by one) due to the
use of floats. This version rewrites those parts using integer maths only.

Many thanks to Luca and Martin for helping me debug this issue.
Vittorio

 configure                  |   2 +
 libavcodec/Makefile        |   2 +
 libavcodec/texturedsp.c    | 610 +++++++++++++++++++++++++++++++++++++++++
 libavcodec/texturedsp.h    |  64 +++++
 libavcodec/texturedspenc.c | 659 +++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1337 insertions(+)
 create mode 100644 libavcodec/texturedsp.c
 create mode 100644 libavcodec/texturedsp.h
 create mode 100644 libavcodec/texturedspenc.c

diff --git a/configure b/configure
index 09c2de0..6e3ea64 100755
--- a/configure
+++ b/configure
@@ -1626,6 +1626,8 @@ CONFIG_EXTRA="
     sinewin
     snappy
     startcode
+    texturedsp
+    texturedspenc
     tpeldsp
     videodsp
     vp3dsp
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 6e04ca6..1d34e2f 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -93,6 +93,8 @@ OBJS-$(CONFIG_RDFT)                    += rdft.o 
$(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_SNAPPY)                  += snappy.o
 OBJS-$(CONFIG_STARTCODE)               += startcode.o
+OBJS-$(CONFIG_TEXTUREDSP)              += texturedsp.o
+OBJS-$(CONFIG_TEXTUREDSPENC)           += texturedspenc.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
 OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
diff --git a/libavcodec/texturedsp.c b/libavcodec/texturedsp.c
new file mode 100644
index 0000000..78eb6fa
--- /dev/null
+++ b/libavcodec/texturedsp.c
@@ -0,0 +1,610 @@
+/*
+ * Texture block decompression
+ * Copyright (C) 2009 Benjamin Dobell, Glass Echidna
+ * Copyright (C) 2012 Matthäus G. "Anteru" Chajdas (http://anteru.net)
+ * Copyright (C) 2015 Vittorio Giovara <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "texturedsp.h"
+
+#define RGBA(r, g, b, a) (r) | ((g) << 8) | ((b) << 16) | ((a) << 24)
+
+static av_always_inline void extract_color(uint32_t colors[4],
+                                           uint16_t color0,
+                                           uint16_t color1,
+                                           int dxtn, int alpha)
+{
+    int tmp;
+    uint8_t r0, g0, b0, r1, g1, b1;
+    uint8_t a = dxtn ? 0 : 255;
+
+    tmp = (color0 >> 11) * 255 + 16;
+    r0  = (uint8_t) ((tmp / 32 + tmp) / 32);
+    tmp = ((color0 & 0x07E0) >> 5) * 255 + 32;
+    g0  = (uint8_t) ((tmp / 64 + tmp) / 64);
+    tmp = (color0 & 0x001F) * 255 + 16;
+    b0  = (uint8_t) ((tmp / 32 + tmp) / 32);
+
+    tmp = (color1 >> 11) * 255 + 16;
+    r1  = (uint8_t) ((tmp / 32 + tmp) / 32);
+    tmp = ((color1 & 0x07E0) >> 5) * 255 + 32;
+    g1  = (uint8_t) ((tmp / 64 + tmp) / 64);
+    tmp = (color1 & 0x001F) * 255 + 16;
+    b1  = (uint8_t) ((tmp / 32 + tmp) / 32);
+
+    if (dxtn || color0 > color1) {
+        colors[0] = RGBA(r0, g0, b0, a);
+        colors[1] = RGBA(r1, g1, b1, a);
+        colors[2] = RGBA((2 * r0 + r1) / 3,
+                         (2 * g0 + g1) / 3,
+                         (2 * b0 + b1) / 3,
+                         a);
+        colors[3] = RGBA((2 * r1 + r0) / 3,
+                         (2 * g1 + g0) / 3,
+                         (2 * b1 + b0) / 3,
+                         a);
+    } else {
+        colors[0] = RGBA(r0, g0, b0, a);
+        colors[1] = RGBA(r1, g1, b1, a);
+        colors[2] = RGBA((r0 + r1) / 2,
+                         (g0 + g1) / 2,
+                         (b0 + b1) / 2,
+                         a);
+        colors[3] = RGBA(0, 0, 0, alpha);
+    }
+}
+
+static inline void dxt1_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block, uint8_t alpha)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint16_t color0 = AV_RL16(block + 0);
+    uint16_t color1 = AV_RL16(block + 2);
+    uint32_t code   = AV_RL32(block + 4);
+
+    extract_color(colors, color0, color1, 0, alpha);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint32_t pixel = colors[code & 3];
+            code >>= 2;
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/**
+ * Decompress one block of a DXT1 texture and store the resulting
+ * RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt1_block_internal(dst, stride, block, 255);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a DXT1 with 1-bit alpha texture and store
+ * the resulting RGBA pixels in 'dst'. Alpha is either fully opaque or
+ * fully transparent.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt1a_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt1_block_internal(dst, stride, block, 0);
+
+    return 8;
+}
+
+static inline void dxt3_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint16_t color0 = AV_RL16(block +  8);
+    uint16_t color1 = AV_RL16(block + 10);
+    uint32_t code   = AV_RL32(block + 12);
+
+    extract_color(colors, color0, color1, 1, 0);
+
+    for (y = 0; y < 4; y++) {
+        const uint16_t alpha_code = AV_RL16(block + 2 * y);
+        uint8_t alpha_values[4];
+
+        alpha_values[0] = ((alpha_code >>  0) & 0x0F) * 17;
+        alpha_values[1] = ((alpha_code >>  4) & 0x0F) * 17;
+        alpha_values[2] = ((alpha_code >>  8) & 0x0F) * 17;
+        alpha_values[3] = ((alpha_code >> 12) & 0x0F) * 17;
+
+        for (x = 0; x < 4; x++) {
+            uint8_t alpha = alpha_values[x];
+            uint32_t pixel = colors[code & 3] | (alpha << 24);
+            code >>= 2;
+
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/** Convert a premultiplied alpha pixel to a straigth alpha pixel. */
+static av_always_inline void premult2straight(uint8_t *src)
+{
+    int r = src[0];
+    int g = src[1];
+    int b = src[2];
+    int a = src[3]; /* unchanged */
+
+    src[0] = (uint8_t) r * a / 255;
+    src[1] = (uint8_t) g * a / 255;
+    src[2] = (uint8_t) b * a / 255;
+}
+
+/**
+ * Decompress one block of a DXT2 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt2_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    dxt3_block_internal(dst, stride, block);
+
+    /* This format is DXT3, but returns premultiplied alpha. It needs to be
+     * converted because it's what lavc outputs (and swscale expects). */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            premult2straight(dst + x * 4 + y * stride);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT3 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt3_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt3_block_internal(dst, stride, block);
+
+    return 16;
+}
+
+/**
+ * Decompress a BC 16x3 index block stored as
+ *   h g f e
+ *   d c b a
+ *   p o n m
+ *   l k j i
+ *
+ * Bits packed as
+ *  | h | g | f | e | d | c | b | a | // Entry
+ *  |765 432 107 654 321 076 543 210| // Bit
+ *  |0000000000111111111112222222222| // Byte
+ *
+ * into 16 8-bit indices.
+ */
+static void decompress_indices(uint8_t *dst, const uint8_t *src)
+{
+    int block, i;
+
+    for (block = 0; block < 2; block++) {
+        int tmp = AV_RL24(src);
+
+        /* Unpack 8x3 bit from last 3 byte block */
+        for (i = 0; i < 8; i++)
+            dst[i] = (tmp >> (i * 3)) & 0x7;
+
+        src += 3;
+        dst += 8;
+    }
+}
+
+static inline void dxt5_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block)
+{
+    int x, y;
+    uint32_t colors[4];
+    uint8_t alpha_indices[16];
+    uint16_t color0 = AV_RL16(block + 8);
+    uint16_t color1 = AV_RL16(block + 10);
+    uint32_t code   = AV_RL32(block + 12);
+    uint8_t alpha0  = *(block);
+    uint8_t alpha1  = *(block + 1);
+
+    decompress_indices(alpha_indices, block + 2);
+
+    extract_color(colors, color0, color1, 1, 0);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int alpha_code = alpha_indices[x + y * 4];
+            uint32_t pixel;
+            uint8_t alpha;
+
+            if (alpha_code == 0) {
+                alpha = alpha0;
+            } else if (alpha_code == 1) {
+                alpha = alpha1;
+            } else {
+                if (alpha0 > alpha1) {
+                    alpha = (uint8_t) (((8 - alpha_code) * alpha0 +
+                                        (alpha_code - 1) * alpha1) / 7);
+                } else {
+                    if (alpha_code == 6) {
+                        alpha = 0;
+                    } else if (alpha_code == 7) {
+                        alpha = 255;
+                    } else {
+                        alpha = (uint8_t) (((6 - alpha_code) * alpha0 +
+                                            (alpha_code - 1) * alpha1) / 5);
+                    }
+                }
+            }
+            pixel = colors[code & 3] | (alpha << 24);
+            code >>= 2;
+            AV_WL32(dst + x * 4, pixel);
+        }
+        dst += stride;
+    }
+}
+
+/**
+ * Decompress one block of a DXT4 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt4_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    dxt5_block_internal(dst, stride, block);
+
+    /* This format is DXT5, but returns premultiplied alpha. It needs to be
+     * converted because it's what lavc outputs (and swscale expects). */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            premult2straight(dst + x * 4 + y * stride);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT5 texture and store the resulting
+ * RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    dxt5_block_internal(dst, stride, block);
+
+    return 16;
+}
+
+/**
+ * Convert a YCoCg buffer to RGBA.
+ *
+ * @param src    input buffer.
+ * @param scaled variant with scaled chroma components and opaque alpha.
+ */
+static av_always_inline void ycocg2rgba(uint8_t *src, int scaled)
+{
+    int r = src[0];
+    int g = src[1];
+    int b = src[2];
+    int a = src[3];
+
+    int s  = scaled ? (b >> 3) + 1 : 1;
+    int y  = a;
+    int co = (r - 128) / s;
+    int cg = (g - 128) / s;
+
+    src[0] = av_clip_uint8(y + co - cg);
+    src[1] = av_clip_uint8(y + cg);
+    src[2] = av_clip_uint8(y - co - cg);
+    src[3] = scaled ? 255 : b;
+}
+
+/**
+ * Decompress one block of a DXT5 texture with classic YCoCg and store
+ * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5y_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    /* This format is basically DXT5, with luma stored in alpha.
+     * Run a normal decompress and then reorder the components. */
+    dxt5_block_internal(dst, stride, block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            ycocg2rgba(dst + x * 4 + y * stride, 0);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a DXT5 texture with scaled YCoCg and store
+ * the resulting RGBA pixels in 'dst'. Alpha component is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+
+    /* This format is basically DXT5, with luma stored in alpha.
+     * Run a normal decompress and then reorder the components. */
+    dxt5_block_internal(dst, stride, block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            ycocg2rgba(dst + x * 4 + y * stride, 1);
+
+    return 16;
+}
+
+static inline void rgtc_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                       const uint8_t *block,
+                                       const int *color_tab)
+{
+    uint8_t indices[16];
+    int x, y;
+
+    decompress_indices(indices, block + 2);
+
+    /* Only one or two channels are stored at most, since it only used to
+     * compress specular (black and white) or normal (red and green) maps.
+     * Although the standard says to zero out unused components, many
+     * implementations fill all of them with the same value. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int i = indices[x + y * 4];
+            /* Interval expansion from [-1 1] or [0 1] to [0 255]. */
+            int c = color_tab[i];
+            uint32_t pixel = RGBA(c, c, c, 255);
+            AV_WL32(dst + x * 4 + y * stride, pixel);
+        }
+    }
+}
+
+static inline void rgtc1_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                        const uint8_t *block, int sign)
+{
+    int color_table[8];
+    int r0, r1;
+
+    if (sign) {
+        /* signed data is in [-128 127] so just offset it to unsigned
+         * and it can be treated exactly the same */
+        r0 = ((int8_t) block[0]) + 128;
+        r1 = ((int8_t) block[1]) + 128;
+    } else {
+        r0 = block[0];
+        r1 = block[1];
+    }
+
+    color_table[0] = r0;
+    color_table[1] = r1;
+
+    if (r0 > r1) {
+        /* 6 interpolated color values */
+        color_table[2] = (6 * r0 + 1 * r1) / 7; // bit code 010
+        color_table[3] = (5 * r0 + 2 * r1) / 7; // bit code 011
+        color_table[4] = (4 * r0 + 3 * r1) / 7; // bit code 100
+        color_table[5] = (3 * r0 + 4 * r1) / 7; // bit code 101
+        color_table[6] = (2 * r0 + 5 * r1) / 7; // bit code 110
+        color_table[7] = (1 * r0 + 6 * r1) / 7; // bit code 111
+    } else {
+        /* 4 interpolated color values */
+        color_table[2] = (4 * r0 + 1 * r1) / 5; // bit code 010
+        color_table[3] = (3 * r0 + 2 * r1) / 5; // bit code 011
+        color_table[4] = (2 * r0 + 3 * r1) / 5; // bit code 100
+        color_table[5] = (1 * r0 + 4 * r1) / 5; // bit code 101
+        color_table[6] = 0;    /* min range */  // bit code 110
+        color_table[7] = 255;  /* max range */  // bit code 111
+    }
+
+    rgtc_block_internal(dst, stride, block, color_table);
+}
+
+/**
+ * Decompress one block of a RGRC1 texture with signed components
+ * and store the resulting RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 1);
+
+    return 8;
+}
+
+/**
+ * Decompress one block of a RGRC1 texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc1u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc1_block_internal(dst, stride, block, 0);
+
+    return 8;
+}
+
+static inline void rgtc2_block_internal(uint8_t *dst, ptrdiff_t stride,
+                                        const uint8_t *block, int sign)
+{
+    /* 4x4 block containing 4 component pixels. */
+    uint8_t c0[4 * 4 * 4];
+    uint8_t c1[4 * 4 * 4];
+    int x, y;
+
+    /* Decompress the two channels separately and interleave them afterwards. 
*/
+    rgtc1_block_internal(c0, 16, block, sign);
+    rgtc1_block_internal(c1, 16, block + 8, sign);
+
+    /* B is rebuilt exactly like a normal map. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint8_t *p = dst + x * 4 + y * stride;
+            int r = c0[x * 4 + y * 16];
+            int g = c1[x * 4 + y * 16];
+            int b = 127;
+
+            int d = (255 * 255 - r * r - g * g) / 2;
+            if (d > 0)
+                b = rint(sqrtf(d));
+
+            p[0] = r;
+            p[1] = g;
+            p[2] = b;
+            p[3] = 255;
+        }
+    }
+}
+
+/**
+ * Decompress one block of a RGRC2 texture with signed components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc2s_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc2_block_internal(dst, stride, block, 1);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a RGRC2 texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int rgtc2u_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    rgtc2_block_internal(dst, stride, block, 0);
+
+    return 16;
+}
+
+/**
+ * Decompress one block of a 3Dc texture with unsigned components
+ * and store the resulting RGBA pixels in 'dst'. Alpha is fully opaque.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to decompress.
+ * @return how much texture data has been consumed.
+ */
+static int dxn3dc_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+    rgtc2_block_internal(dst, stride, block, 0);
+
+    /* This is the 3Dc variant of RGTC2, with swapped R and G. */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            uint8_t *p = dst + x * 4 + y * stride;
+            FFSWAP(uint8_t, p[0], p[1]);
+        }
+    }
+
+    return 16;
+}
+
+av_cold void ff_texturedsp_init(TextureDSPContext *c)
+{
+    c->dxt1_block   = dxt1_block;
+    c->dxt1a_block  = dxt1a_block;
+    c->dxt2_block   = dxt2_block;
+    c->dxt3_block   = dxt3_block;
+    c->dxt4_block   = dxt4_block;
+    c->dxt5_block   = dxt5_block;
+    c->dxt5y_block  = dxt5y_block;
+    c->dxt5ys_block = dxt5ys_block;
+    c->rgtc1s_block = rgtc1s_block;
+    c->rgtc1u_block = rgtc1u_block;
+    c->rgtc2s_block = rgtc2s_block;
+    c->rgtc2u_block = rgtc2u_block;
+    c->dxn3dc_block = dxn3dc_block;
+}
diff --git a/libavcodec/texturedsp.h b/libavcodec/texturedsp.h
new file mode 100644
index 0000000..fcbe7a4
--- /dev/null
+++ b/libavcodec/texturedsp.h
@@ -0,0 +1,64 @@
+/*
+ * Texture block module
+ * Copyright (C) 2015 Vittorio Giovara <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Texture block (4x4) module
+ *
+ * References:
+ *   https://www.opengl.org/wiki/S3_Texture_Compression
+ *   https://www.opengl.org/wiki/Red_Green_Texture_Compression
+ *   https://msdn.microsoft.com/en-us/library/bb694531%28v=vs.85%29.aspx
+ *
+ * All functions return how much data has been written or read.
+ *
+ * Pixel input or output format is always AV_PIX_FMT_RGBA.
+ */
+
+#ifndef AVCODEC_TEXTUREDSP_H
+#define AVCODEC_TEXTUREDSP_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define TEXTURE_BLOCK_W 4
+#define TEXTURE_BLOCK_H 4
+
+typedef struct TextureDSPContext {
+    int (*dxt1_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt1a_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt2_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt3_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt4_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5_block)  (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5y_block) (uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxt5ys_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc1u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2s_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*rgtc2u_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*dxn3dc_block)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+} TextureDSPContext;
+
+void ff_texturedsp_init(TextureDSPContext *c);
+void ff_texturedspenc_init(TextureDSPContext *c);
+
+#endif /* AVCODEC_TEXTUREDSP_H */
diff --git a/libavcodec/texturedspenc.c b/libavcodec/texturedspenc.c
new file mode 100644
index 0000000..4a387c5
--- /dev/null
+++ b/libavcodec/texturedspenc.c
@@ -0,0 +1,659 @@
+/*
+ * Texture block compression
+ * Copyright (C) 2015 Vittorio Giovara <[email protected]>
+ * Based on public domain code by Fabian Giesen, Sean Barrett and Yann Collet.
+ *
+ * This file is part of Libav
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+
+#include "texturedsp.h"
+
+const static uint8_t expand5[32] = {
+      0,   8,  16,  24,  33,  41,  49,  57,  66,  74,  82,  90,
+     99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189,
+    198, 206, 214, 222, 231, 239, 247, 255,
+};
+
+const static uint8_t expand6[64] = {
+      0,   4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  44,
+     48,  52,  56,  60,  65,  69,  73,  77,  81,  85,  89,  93,
+     97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142,
+    146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
+    195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
+    243, 247, 251, 255,
+};
+
+const static uint8_t match5[256][2] = {
+    {  0,  0 }, {  0,  0 }, {  0,  1 }, {  0,  1 }, {  1,  0 }, {  1,  0 },
+    {  1,  0 }, {  1,  1 }, {  1,  1 }, {  2,  0 }, {  2,  0 }, {  0,  4 },
+    {  2,  1 }, {  2,  1 }, {  2,  1 }, {  3,  0 }, {  3,  0 }, {  3,  0 },
+    {  3,  1 }, {  1,  5 }, {  3,  2 }, {  3,  2 }, {  4,  0 }, {  4,  0 },
+    {  4,  1 }, {  4,  1 }, {  4,  2 }, {  4,  2 }, {  4,  2 }, {  3,  5 },
+    {  5,  1 }, {  5,  1 }, {  5,  2 }, {  4,  4 }, {  5,  3 }, {  5,  3 },
+    {  5,  3 }, {  6,  2 }, {  6,  2 }, {  6,  2 }, {  6,  3 }, {  5,  5 },
+    {  6,  4 }, {  6,  4 }, {  4,  8 }, {  7,  3 }, {  7,  3 }, {  7,  3 },
+    {  7,  4 }, {  7,  4 }, {  7,  4 }, {  7,  5 }, {  5,  9 }, {  7,  6 },
+    {  7,  6 }, {  8,  4 }, {  8,  4 }, {  8,  5 }, {  8,  5 }, {  8,  6 },
+    {  8,  6 }, {  8,  6 }, {  7,  9 }, {  9,  5 }, {  9,  5 }, {  9,  6 },
+    {  8,  8 }, {  9,  7 }, {  9,  7 }, {  9,  7 }, { 10,  6 }, { 10,  6 },
+    { 10,  6 }, { 10,  7 }, {  9,  9 }, { 10,  8 }, { 10,  8 }, {  8, 12 },
+    { 11,  7 }, { 11,  7 }, { 11,  7 }, { 11,  8 }, { 11,  8 }, { 11,  8 },
+    { 11,  9 }, {  9, 13 }, { 11, 10 }, { 11, 10 }, { 12,  8 }, { 12,  8 },
+    { 12,  9 }, { 12,  9 }, { 12, 10 }, { 12, 10 }, { 12, 10 }, { 11, 13 },
+    { 13,  9 }, { 13,  9 }, { 13, 10 }, { 12, 12 }, { 13, 11 }, { 13, 11 },
+    { 13, 11 }, { 14, 10 }, { 14, 10 }, { 14, 10 }, { 14, 11 }, { 13, 13 },
+    { 14, 12 }, { 14, 12 }, { 12, 16 }, { 15, 11 }, { 15, 11 }, { 15, 11 },
+    { 15, 12 }, { 15, 12 }, { 15, 12 }, { 15, 13 }, { 13, 17 }, { 15, 14 },
+    { 15, 14 }, { 16, 12 }, { 16, 12 }, { 16, 13 }, { 16, 13 }, { 16, 14 },
+    { 16, 14 }, { 16, 14 }, { 15, 17 }, { 17, 13 }, { 17, 13 }, { 17, 14 },
+    { 16, 16 }, { 17, 15 }, { 17, 15 }, { 17, 15 }, { 18, 14 }, { 18, 14 },
+    { 18, 14 }, { 18, 15 }, { 17, 17 }, { 18, 16 }, { 18, 16 }, { 16, 20 },
+    { 19, 15 }, { 19, 15 }, { 19, 15 }, { 19, 16 }, { 19, 16 }, { 19, 16 },
+    { 19, 17 }, { 17, 21 }, { 19, 18 }, { 19, 18 }, { 20, 16 }, { 20, 16 },
+    { 20, 17 }, { 20, 17 }, { 20, 18 }, { 20, 18 }, { 20, 18 }, { 19, 21 },
+    { 21, 17 }, { 21, 17 }, { 21, 18 }, { 20, 20 }, { 21, 19 }, { 21, 19 },
+    { 21, 19 }, { 22, 18 }, { 22, 18 }, { 22, 18 }, { 22, 19 }, { 21, 21 },
+    { 22, 20 }, { 22, 20 }, { 20, 24 }, { 23, 19 }, { 23, 19 }, { 23, 19 },
+    { 23, 20 }, { 23, 20 }, { 23, 20 }, { 23, 21 }, { 21, 25 }, { 23, 22 },
+    { 23, 22 }, { 24, 20 }, { 24, 20 }, { 24, 21 }, { 24, 21 }, { 24, 22 },
+    { 24, 22 }, { 24, 22 }, { 23, 25 }, { 25, 21 }, { 25, 21 }, { 25, 22 },
+    { 24, 24 }, { 25, 23 }, { 25, 23 }, { 25, 23 }, { 26, 22 }, { 26, 22 },
+    { 26, 22 }, { 26, 23 }, { 25, 25 }, { 26, 24 }, { 26, 24 }, { 24, 28 },
+    { 27, 23 }, { 27, 23 }, { 27, 23 }, { 27, 24 }, { 27, 24 }, { 27, 24 },
+    { 27, 25 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 28, 24 },
+    { 28, 25 }, { 28, 25 }, { 28, 26 }, { 28, 26 }, { 28, 26 }, { 27, 29 },
+    { 29, 25 }, { 29, 25 }, { 29, 26 }, { 28, 28 }, { 29, 27 }, { 29, 27 },
+    { 29, 27 }, { 30, 26 }, { 30, 26 }, { 30, 26 }, { 30, 27 }, { 29, 29 },
+    { 30, 28 }, { 30, 28 }, { 30, 28 }, { 31, 27 }, { 31, 27 }, { 31, 27 },
+    { 31, 28 }, { 31, 28 }, { 31, 28 }, { 31, 29 }, { 31, 29 }, { 31, 30 },
+    { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 },
+};
+
+const static uint8_t match6[256][2] = {
+    {  0,  0 }, {  0,  1 }, {  1,  0 }, {  1,  0 }, {  1,  1 }, {  2,  0 },
+    {  2,  1 }, {  3,  0 }, {  3,  0 }, {  3,  1 }, {  4,  0 }, {  4,  0 },
+    {  4,  1 }, {  5,  0 }, {  5,  1 }, {  6,  0 }, {  6,  0 }, {  6,  1 },
+    {  7,  0 }, {  7,  0 }, {  7,  1 }, {  8,  0 }, {  8,  1 }, {  8,  1 },
+    {  8,  2 }, {  9,  1 }, {  9,  2 }, {  9,  2 }, {  9,  3 }, { 10,  2 },
+    { 10,  3 }, { 10,  3 }, { 10,  4 }, { 11,  3 }, { 11,  4 }, { 11,  4 },
+    { 11,  5 }, { 12,  4 }, { 12,  5 }, { 12,  5 }, { 12,  6 }, { 13,  5 },
+    { 13,  6 }, {  8, 16 }, { 13,  7 }, { 14,  6 }, { 14,  7 }, {  9, 17 },
+    { 14,  8 }, { 15,  7 }, { 15,  8 }, { 11, 16 }, { 15,  9 }, { 15, 10 },
+    { 16,  8 }, { 16,  9 }, { 16, 10 }, { 15, 13 }, { 17,  9 }, { 17, 10 },
+    { 17, 11 }, { 15, 16 }, { 18, 10 }, { 18, 11 }, { 18, 12 }, { 16, 16 },
+    { 19, 11 }, { 19, 12 }, { 19, 13 }, { 17, 17 }, { 20, 12 }, { 20, 13 },
+    { 20, 14 }, { 19, 16 }, { 21, 13 }, { 21, 14 }, { 21, 15 }, { 20, 17 },
+    { 22, 14 }, { 22, 15 }, { 25, 10 }, { 22, 16 }, { 23, 15 }, { 23, 16 },
+    { 26, 11 }, { 23, 17 }, { 24, 16 }, { 24, 17 }, { 27, 12 }, { 24, 18 },
+    { 25, 17 }, { 25, 18 }, { 28, 13 }, { 25, 19 }, { 26, 18 }, { 26, 19 },
+    { 29, 14 }, { 26, 20 }, { 27, 19 }, { 27, 20 }, { 30, 15 }, { 27, 21 },
+    { 28, 20 }, { 28, 21 }, { 28, 21 }, { 28, 22 }, { 29, 21 }, { 29, 22 },
+    { 24, 32 }, { 29, 23 }, { 30, 22 }, { 30, 23 }, { 25, 33 }, { 30, 24 },
+    { 31, 23 }, { 31, 24 }, { 27, 32 }, { 31, 25 }, { 31, 26 }, { 32, 24 },
+    { 32, 25 }, { 32, 26 }, { 31, 29 }, { 33, 25 }, { 33, 26 }, { 33, 27 },
+    { 31, 32 }, { 34, 26 }, { 34, 27 }, { 34, 28 }, { 32, 32 }, { 35, 27 },
+    { 35, 28 }, { 35, 29 }, { 33, 33 }, { 36, 28 }, { 36, 29 }, { 36, 30 },
+    { 35, 32 }, { 37, 29 }, { 37, 30 }, { 37, 31 }, { 36, 33 }, { 38, 30 },
+    { 38, 31 }, { 41, 26 }, { 38, 32 }, { 39, 31 }, { 39, 32 }, { 42, 27 },
+    { 39, 33 }, { 40, 32 }, { 40, 33 }, { 43, 28 }, { 40, 34 }, { 41, 33 },
+    { 41, 34 }, { 44, 29 }, { 41, 35 }, { 42, 34 }, { 42, 35 }, { 45, 30 },
+    { 42, 36 }, { 43, 35 }, { 43, 36 }, { 46, 31 }, { 43, 37 }, { 44, 36 },
+    { 44, 37 }, { 44, 37 }, { 44, 38 }, { 45, 37 }, { 45, 38 }, { 40, 48 },
+    { 45, 39 }, { 46, 38 }, { 46, 39 }, { 41, 49 }, { 46, 40 }, { 47, 39 },
+    { 47, 40 }, { 43, 48 }, { 47, 41 }, { 47, 42 }, { 48, 40 }, { 48, 41 },
+    { 48, 42 }, { 47, 45 }, { 49, 41 }, { 49, 42 }, { 49, 43 }, { 47, 48 },
+    { 50, 42 }, { 50, 43 }, { 50, 44 }, { 48, 48 }, { 51, 43 }, { 51, 44 },
+    { 51, 45 }, { 49, 49 }, { 52, 44 }, { 52, 45 }, { 52, 46 }, { 51, 48 },
+    { 53, 45 }, { 53, 46 }, { 53, 47 }, { 52, 49 }, { 54, 46 }, { 54, 47 },
+    { 57, 42 }, { 54, 48 }, { 55, 47 }, { 55, 48 }, { 58, 43 }, { 55, 49 },
+    { 56, 48 }, { 56, 49 }, { 59, 44 }, { 56, 50 }, { 57, 49 }, { 57, 50 },
+    { 60, 45 }, { 57, 51 }, { 58, 50 }, { 58, 51 }, { 61, 46 }, { 58, 52 },
+    { 59, 51 }, { 59, 52 }, { 62, 47 }, { 59, 53 }, { 60, 52 }, { 60, 53 },
+    { 60, 53 }, { 60, 54 }, { 61, 53 }, { 61, 54 }, { 61, 54 }, { 61, 55 },
+    { 62, 54 }, { 62, 55 }, { 62, 55 }, { 62, 56 }, { 63, 55 }, { 63, 56 },
+    { 63, 56 }, { 63, 57 }, { 63, 58 }, { 63, 59 }, { 63, 59 }, { 63, 60 },
+    { 63, 61 }, { 63, 62 }, { 63, 62 }, { 63, 63 },
+};
+
+/* Multiplication over 8 bit emulation */
+#define mul8(a, b) (a * b + 128 + ((a * b + 128) >> 8)) >> 8
+
+/* Conversion from rgb24 to rgb565 */
+#define rgb2rgb565(r, g, b) \
+    (mul8(r, 31) << 11) | (mul8(g, 63) << 5) | (mul8(b, 31) << 0)
+
+/* Linear interpolation at 1/3 point between a and b */
+#define lerp13(a, b) (2 * a + b) / 3
+
+/* Linear interpolation on an RGB pixel */
+static inline void lerp13rgb(uint8_t *out, uint8_t *p1, uint8_t *p2)
+{
+    out[0] = lerp13(p1[0], p2[0]);
+    out[1] = lerp13(p1[1], p2[1]);
+    out[2] = lerp13(p1[2], p2[2]);
+}
+
+/* Conversion from rgb565 to rgb24 */
+static inline void rgb5652rgb(uint8_t *out, uint16_t v)
+{
+    int rv = (v & 0xf800) >> 11;
+    int gv = (v & 0x07e0) >> 5;
+    int bv = (v & 0x001f) >> 0;
+
+    out[0] = expand5[rv];
+    out[1] = expand6[gv];
+    out[2] = expand5[bv];
+    out[3] = 0;
+}
+
+/* Color matching function */
+static unsigned int match_colors(const uint8_t *block, ptrdiff_t stride,
+                                 uint16_t c0, uint16_t c1)
+{
+    uint32_t mask = 0;
+    int dirr, dirg, dirb;
+    int dots[16];
+    int stops[4];
+    int x, y, k = 0;
+    int c0_point, half_point, c3_point;
+    uint8_t color[16];
+    const int indexMap[8] = {
+        0 << 30, 2 << 30, 0 << 30, 2 << 30,
+        3 << 30, 3 << 30, 1 << 30, 1 << 30,
+    };
+
+    /* Fill color and compute direction for each component */
+    rgb5652rgb(color + 0, c0);
+    rgb5652rgb(color + 4, c1);
+    lerp13rgb(color + 8, color + 0, color + 4);
+    lerp13rgb(color + 12, color + 4, color + 0);
+
+    dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
+    dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
+    dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++)
+            dots[k++] = block[0 + x * 4 + y * stride] * dirr +
+                        block[1 + x * 4 + y * stride] * dirg +
+                        block[2 + x * 4 + y * stride] * dirb;
+
+        stops[y] = color[0 + y * 4] * dirr +
+                   color[1 + y * 4] * dirg +
+                   color[2 + y * 4] * dirb;
+    }
+
+    /* Think of the colors as arranged on a line; project point onto that line,
+     * then choose next color out of available ones. we compute the crossover
+     * points for 'best color in top half'/'best in bottom half' and then
+     * the same inside that subinterval.
+     *
+     * Relying on this 1d approximation isn't always optimal in terms of
+     * euclidean distance, but it's very close and a lot faster.
+     *
+     * http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html */
+    c0_point   = (stops[1] + stops[3]) >> 1;
+    half_point = (stops[3] + stops[2]) >> 1;
+    c3_point   = (stops[2] + stops[0]) >> 1;
+
+    for (x = 0; x < 16; x++) {
+        int dot  = dots[x];
+        int bits = (dot < half_point ? 4 : 0) |
+                   (dot < c0_point   ? 2 : 0) |
+                   (dot < c3_point   ? 1 : 0);
+
+        mask >>= 2;
+        mask  |= indexMap[bits];
+    }
+
+    return mask;
+}
+
+/* Color optimization function */
+static void optimize_colors(const uint8_t *block, ptrdiff_t stride,
+                            uint16_t *pmax16, uint16_t *pmin16)
+{
+    const uint8_t *minp;
+    const uint8_t *maxp;
+    const int iter_power = 4;
+    double magn;
+    int v_r, v_g, v_b;
+    float covf[6], vfr, vfg, vfb;
+    int mind, maxd;
+    int cov[6] = { 0 };
+    int mu[3], min[3], max[3];
+    int ch, iter, x, y;
+
+    /* Determine color distribution */
+    for (ch = 0; ch < 3; ch++) {
+        const uint8_t *bp = &block[ch];
+        int muv, minv, maxv;
+
+        muv = minv = maxv = bp[0];
+        for (y = 0; y < 4; y++) {
+            for (x = 4; x < 4; x += 4) {
+                muv += bp[x * 4 + y * stride];
+                if (bp[x] < minv)
+                    minv = bp[x * 4 + y * stride];
+                else if (bp[x] > maxv)
+                    maxv = bp[x * 4 + y * stride];
+            }
+        }
+
+        mu[ch]  = (muv + 8) >> 4;
+        min[ch] = minv;
+        max[ch] = maxv;
+    }
+
+    /* Determine covariance matrix */
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int r = block[x * 4 + stride * y + 0] - mu[0];
+            int g = block[x * 4 + stride * y + 1] - mu[1];
+            int b = block[x * 4 + stride * y + 2] - mu[2];
+
+            cov[0] += r * r;
+            cov[1] += r * g;
+            cov[2] += r * b;
+            cov[3] += g * g;
+            cov[4] += g * b;
+            cov[5] += b * b;
+        }
+    }
+
+    /* Convert covariance matrix to float, find principal axis via power iter 
*/
+    for (x = 0; x < 6; x++)
+        covf[x] = cov[x] / 255.0f;
+
+    vfr = (float) (max[0] - min[0]);
+    vfg = (float) (max[1] - min[1]);
+    vfb = (float) (max[2] - min[2]);
+
+    for (iter = 0; iter < iter_power; iter++) {
+        float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
+        float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
+        float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
+
+        vfr = r;
+        vfg = g;
+        vfb = b;
+    }
+
+    magn = fabs(vfr);
+    if (fabs(vfg) > magn)
+        magn = fabs(vfg);
+    if (fabs(vfb) > magn)
+        magn = fabs(vfb);
+
+    /* if magnitudo is too small, default to luminance */
+    if (magn < 4.0f) {
+        /* JPEG YCbCr luma coefs, scaled by 1000 */
+        v_r = 299;
+        v_g = 587;
+        v_b = 114;
+    } else {
+        magn = 512.0 / magn;
+        v_r  = (int) (vfr * magn);
+        v_g  = (int) (vfg * magn);
+        v_b  = (int) (vfb * magn);
+    }
+
+    /* Pick colors at extreme points */
+    mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
+    minp = maxp = block;
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int dot = block[x * 4 + y * stride + 0] * v_r +
+                      block[x * 4 + y * stride + 1] * v_g +
+                      block[x * 4 + y * stride + 2] * v_b;
+
+            if (dot < mind) {
+                mind = dot;
+                minp = block + x * 4 + y * stride;
+            } else if (dot > maxd) {
+                maxd = dot;
+                maxp = block + x * 4 + y * stride;
+            }
+        }
+    }
+
+    *pmax16 = rgb2rgb565(maxp[0], maxp[1], maxp[2]);
+    *pmin16 = rgb2rgb565(minp[0], minp[1], minp[2]);
+}
+
+/* Try to optimize colors to suit block contents better, by solving
+ * a least squares system via normal equations + Cramer's rule. */
+static int refine_colors(const uint8_t *block, ptrdiff_t stride,
+                         uint16_t *pmax16, uint16_t *pmin16, uint32_t mask)
+{
+    uint32_t cm = mask;
+    uint16_t oldMin = *pmin16;
+    uint16_t oldMax = *pmax16;
+    uint16_t min16, max16;
+    int x, y;
+
+    /* Additional magic to save a lot of multiplies in the accumulating loop.
+     * The tables contain precomputed products of weights for least squares
+     * system, accumulated inside one 32-bit register */
+    const int w1tab[4] = { 3, 0, 2, 1 };
+    const int prods[4] = { 0x090000, 0x000900, 0x040102, 0x010402 };
+
+    /* Check if all pixels have the same index */
+    if (mask ^ (mask << 2) < 4) {
+        /* If so, linear system would be singular; solve using optimal
+         * single-color match on average color. */
+        int r = 8, g = 8, b = 8;
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                r += block[0 + x * 4 + y * stride];
+                g += block[1 + x * 4 + y * stride];
+                b += block[2 + x * 4 + y * stride];
+            }
+        }
+
+        r >>= 4;
+        g >>= 4;
+        b >>= 4;
+
+        max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
+        min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
+    } else {
+        float fr, fg, fb;
+        int at1_r = 0, at1_g = 0, at1_b = 0;
+        int at2_r = 0, at2_g = 0, at2_b = 0;
+        int akku = 0;
+        int xx, xy, yy;
+
+        for (y = 0; y < 4; y++) {
+            for (x = 0; x < 4; x++) {
+                int step = cm & 3;
+                int w1 = w1tab[step];
+                int r = block[0 + x * 4 + y * stride];
+                int g = block[1 + x * 4 + y * stride];
+                int b = block[2 + x * 4 + y * stride];
+
+                akku  += prods[step];
+                at1_r += w1 * r;
+                at1_g += w1 * g;
+                at1_b += w1 * b;
+                at2_r += r;
+                at2_g += g;
+                at2_b += b;
+
+                cm >>= 2;
+            }
+        }
+
+        at2_r = 3 * at2_r - at1_r;
+        at2_g = 3 * at2_g - at1_g;
+        at2_b = 3 * at2_b - at1_b;
+
+        /* Extract solutions and decide solvability */
+        xx =  akku >> 16;
+        yy = (akku >>  8) & 0xFF;
+        xy = (akku >>  0) & 0xFF;
+
+        fr = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
+        fg = fr * 63.0f / 31.0f;
+        fb = fr;
+
+        /* Solve */
+        max16  = av_clip_uintp2((at1_r * yy - at2_r * xy) * fr + 0.5f, 5) << 
11;
+        max16 |= av_clip_uintp2((at1_g * yy - at2_g * xy) * fg + 0.5f, 6) <<  
5;
+        max16 |= av_clip_uintp2((at1_b * yy - at2_b * xy) * fb + 0.5f, 5) <<  
0;
+
+        min16  = av_clip_uintp2((at2_r * xx - at1_r * xy) * fr + 0.5f, 5) << 
11;
+        min16 |= av_clip_uintp2((at2_g * xx - at1_g * xy) * fg + 0.5f, 6) <<  
5;
+        min16 |= av_clip_uintp2((at2_b * xx - at1_b * xy) * fb + 0.5f, 5) <<  
0;
+    }
+
+    *pmin16 = min16;
+    *pmax16 = max16;
+    return oldMin != min16 || oldMax != max16;
+}
+
+/* Check if input block is a constant color */
+static int constant_color(const uint8_t *block, ptrdiff_t stride)
+{
+    int x, y;
+    uint32_t first = AV_RL32(block);
+
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            if (first != AV_RL32(block + x * 4 + y * stride))
+                return 0;
+    return 1;
+}
+
+/* Main color compression function */
+static void compress_color(uint8_t *dst, ptrdiff_t stride, const uint8_t 
*block)
+{
+    uint32_t mask;
+    uint16_t max16, min16;
+    int constant = constant_color(block, stride);
+
+    /* Constant color will load values from tables */
+    if (constant) {
+        int r = block[0];
+        int g = block[1];
+        int b = block[2];
+        mask  = 0xAAAAAAAA;
+        max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
+        min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
+    } else {
+        int refine;
+
+        /* Otherwise find pca and map along principal axis */
+        optimize_colors(block, stride, &max16, &min16);
+        if (max16 != min16)
+            mask = match_colors(block, stride, max16, min16);
+        else
+            mask = 0;
+
+        /* One pass refinement */
+        refine  = refine_colors(block, stride, &max16, &min16, mask);
+        if (refine) {
+            if (max16 != min16)
+                mask = match_colors(block, stride, max16, min16);
+            else
+                mask = 0;
+        }
+    }
+
+    /* Finally write the color block */
+    if (max16 < min16) {
+        FFSWAP(uint16_t, min16, max16);
+        mask ^= 0x55555555;
+    }
+
+    AV_WL16(dst + 0, max16);
+    AV_WL16(dst + 2, min16);
+    AV_WL32(dst + 4, mask);
+}
+
+/* Alpha compression function */
+static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t 
*block)
+{
+    int x, y;
+    int dist, bias, dist4, dist2;
+    int mn, mx;
+    int bits = 0;
+    int mask = 0;
+
+    memset(dst, 0, 8);
+
+    /* Find min/max color */
+    mn = mx = block[3];
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int val = block[3 + x * 4 + y * stride];
+            if (val < mn)
+                mn = val;
+            else if (val > mx)
+                mx = val;
+        }
+    }
+
+    /* Encode them */
+    dst[0] = (uint8_t) mx;
+    dst[1] = (uint8_t) mn;
+    dst += 2;
+
+    /* Mono-alpha shortcut */
+    if (mn == mx)
+        return;
+
+    /* Determine bias and emit color indices.
+     * Given the choice of mx/mn, these indices are optimal:
+     * fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination */
+    dist = mx - mn;
+
+    dist4 = dist * 4;
+    dist2 = dist * 2;
+    if (dist < 8)
+        bias = dist - 1 - mn * 7;
+    else
+        bias = dist / 2 + 2 - mn * 7;
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int alp = block[3 + x * 4 + y * stride] * 7 + bias;
+            int ind, tmp;
+
+            /* This is a "linear scale" lerp factor between 0 (val=min)
+             * and 7 (val=max) to select index. */
+            tmp  = (alp >= dist4) ? -1 : 0;
+            ind  = tmp & 4;
+            alp -= dist4 & tmp;
+            tmp  = (alp >= dist2) ? -1 : 0;
+            ind += tmp & 2;
+            alp -= dist2 & tmp;
+            ind += (alp >= dist);
+
+            /* Turn linear scale into DXT index (0/1 are extreme points) */
+            ind  = -ind & 7;
+            ind ^= (2 > ind);
+
+            /* Write index */
+            mask |= ind << bits;
+            bits += 3;
+            if (bits >= 8) {
+                *dst++ = mask;
+                mask >>= 8;
+                bits  -= 8;
+            }
+        }
+    }
+}
+
+/**
+ * Convert a RGBA buffer to unscaled YCoCg.
+ * Scale is usually introduced to avoid banding over a certain range of colors,
+ * but this version of the algorithm does not introduce it as much as other
+ * implementations, allowing for a simpler and faster conversion.
+ */
+static void rgba2ycocg(uint8_t *dst, const uint8_t *pixel)
+{
+    int r =  pixel[0];
+    int g = (pixel[1] + 1) >> 1;
+    int b =  pixel[2];
+    int t = (2 + r + b) >> 2;
+
+    int y  = av_clip_uint8(g + t);
+    int co = av_clip_uint8(128 + ((r - b + 1) >> 1));
+    int cg = av_clip_uint8(128 + g - t);
+
+    dst[0] = (uint8_t) co;
+    dst[1] = (uint8_t) cg;
+    dst[2] = 0;
+    dst[3] = (uint8_t) y;
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT1 texture and store the
+ * resulting bytes in 'dst'. Alpha is not preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    compress_color(dst, stride, block);
+
+    return 8;
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT5 texture and store the
+ * resulting bytes in 'dst'. Alpha is preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    compress_alpha(dst, stride, block);
+    compress_color(dst + 8, stride, block);
+
+    return 16;
+}
+
+/**
+ * Compress one block of RGBA pixels in a DXT5-YCoCg texture and store the
+ * resulting bytes in 'dst'. Alpha is not preserved.
+ *
+ * @param dst    output buffer.
+ * @param stride scanline in bytes.
+ * @param block  block to compress.
+ * @return how much texture data has been written.
+ */
+static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
+{
+    int x, y;
+    uint8_t reorder[64];
+
+    /* Reorder the components and then run a normal DXT5 compression. */
+    for (y = 0; y < 4; y++)
+        for (x = 0; x < 4; x++)
+            rgba2ycocg(reorder + x * 4 + y * 16, block + x * 4 + y * stride);
+
+    compress_alpha(dst + 0, 16, reorder);
+    compress_color(dst + 8, 16, reorder);
+
+    return 16;
+}
+
+av_cold void ff_texturedspenc_init(TextureDSPContext *c)
+{
+    c->dxt1_block   = dxt1_block;
+    c->dxt5_block   = dxt5_block;
+    c->dxt5ys_block = dxt5ys_block;
+}
-- 
1.9.5 (Apple Git-50.3)


_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH 2/5] Introduce a TextureDSP module

Reply via email to