jpeg pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=9550b653480f7a7a3af02c7e7712dc9e081605f4

commit 9550b653480f7a7a3af02c7e7712dc9e081605f4
Author: Matvey Konovalov <m.konova...@samsung.com>
Date:   Wed Apr 30 14:08:55 2014 +0900

    Evas rg_etc1: Unroll the decoding loop for performance
    
    Measurements have shown a 40% perf increase with these changes.
    Patch by Matvey Konovalov.
    
    Signed-off-by: Jean-Philippe Andre <jp.an...@samsung.com>
---
 src/static_libs/rg_etc/rg_etc1.c | 220 +++++++++++++++++++++++++++------------
 1 file changed, 152 insertions(+), 68 deletions(-)

diff --git a/src/static_libs/rg_etc/rg_etc1.c b/src/static_libs/rg_etc/rg_etc1.c
index 2df3202..f768eaa 100644
--- a/src/static_libs/rg_etc/rg_etc1.c
+++ b/src/static_libs/rg_etc/rg_etc1.c
@@ -151,6 +151,8 @@ enum RG_Etc_Constants
 #define B_VAL_GET(p) (((DATA8 *)(p))[1])
 #endif
 
+#define A_MASK (0xFFul << 24)
+
 // For unpacking and writing BGRA output data
 #define ARGB_JOIN(a,r,g,b) \
         (((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
@@ -529,21 +531,9 @@ rg_etc1_color_quad_init(unsigned char r, unsigned char g, 
unsigned char b, unsig
 }
 
 static inline unsigned int
-rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, 
unsigned char preserve_alpha)
+rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color)
 {
-   if (preserve_alpha)
-     {
-        unsigned char r, g, b, a;
-
-        // Used for UNPACKING
-        a = A_VAL_SET(&old_color);
-        r = R_VAL_SET(&new_color);
-        g = G_VAL_SET(&new_color);
-        b = B_VAL_SET(&new_color);
-
-        return ARGB_JOIN(a, r, g, b);
-     }
-   return new_color;
+   return (new_color & ~A_MASK) | (old_color & A_MASK);
 }
 
 static inline void
@@ -876,15 +866,15 @@ rg_etc_block_base4_color_get(const unsigned char 
bytes[8], unsigned char idx)
 
    if (idx)
      {
-        r = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4R2BitOffset, 4);
-        g = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4G2BitOffset, 4);
-        b = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4B2BitOffset, 4);
+        r = (bytes[0]) & ((1 << 4) - 1);
+        g = (bytes[1]) & ((1 << 4) - 1);
+        b = (bytes[2]) & ((1 << 4) - 1);
      }
    else
      {
-        r = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4R1BitOffset, 4);
-        g = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4G1BitOffset, 4);
-        b = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4B1BitOffset, 4);
+        r = (bytes[0] >> 4) & ((1 << 4) - 1);
+        g = (bytes[1] >> 4) & ((1 << 4) - 1);
+        b = (bytes[2] >> 4) & ((1 << 4) - 1);
      }
 
    return b | (g << 4) | (r << 8);
@@ -912,9 +902,9 @@ rg_etc1_block_base5_color_get(const unsigned char bytes[8])
 {
    unsigned short r, g, b;
 
-   r = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5RBitOffset, 5);
-   g = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5GBitOffset, 5);
-   b = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5BBitOffset, 5);
+   r = (bytes[0] >> 3) & ((1 << 5) - 1);
+   g = (bytes[1] >> 3) & ((1 << 5) - 1);
+   b = (bytes[2] >> 3) & ((1 << 5) - 1);
 
    return b | (g << 5) | (r << 10);
 }
@@ -932,9 +922,9 @@ rg_etc1_block_delta3_color_get(const unsigned char bytes[8])
 {
    unsigned short r, g, b;
 
-   r = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3RBitOffset, 3);
-   g = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3GBitOffset, 3);
-   b = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3BBitOffset, 3);
+   r = (bytes[0]) & ((1 << 3) - 1);
+   g = (bytes[1]) & ((1 << 3) - 1);
+   b = (bytes[2]) & ((1 << 3) - 1);
 
    return b | (g << 3) | (r << 6);
 }
@@ -1207,13 +1197,14 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned 
int *pDst_pixels_BGRA, boo
    unsigned char diff_flag, flip_flag, table_index0, table_index1;
    unsigned int subblock_colors0[4] = { 0 };
    unsigned int subblock_colors1[4] = { 0 };
-   unsigned char x, y;
    unsigned char success = 1;
+   const unsigned char *bytes;
+   bytes = (unsigned char *)ETC1_block;
 
    diff_flag = rg_etc1_block_diff_bit_get(ETC1_block);
    flip_flag = rg_etc1_block_flip_bit_get(ETC1_block);
-   table_index0 = rg_etc1_block_inten_table_get(ETC1_block, 0);
-   table_index1 = rg_etc1_block_inten_table_get(ETC1_block, 1);
+   table_index0 = (bytes[3] >> 5) & 7;
+   table_index1 = (bytes[3] >> 2) & 7;
 
    if (diff_flag)
      {
@@ -1249,44 +1240,137 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned 
int *pDst_pixels_BGRA, boo
    //  0011
    //  0011
    //  0011
-   // Depending on flip_flag.
-
-   if (flip_flag)
-     {
-        for (y = 0; y < 2; y++)
-          {
-             for (x = 0; x < 4; x++)
-               pDst_pixels_BGRA[x] = 
rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
-                                                  
subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
-                                                  preserve_alpha);
-             pDst_pixels_BGRA += 4;
-          }
-
-        for (y = 2; y < 4; y++)
-          {
-             for (x = 0; x < 4; x++)
-               pDst_pixels_BGRA[x] = 
rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
-                                                  
subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
-                                                  preserve_alpha);
-             pDst_pixels_BGRA += 4;
-          }
-     }
-   else
-     {
-        for (y = 0; y < 4; y++)
-          {
-             for (x = 0; x < 2; x++)
-               pDst_pixels_BGRA[x] = 
rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
-                                                  
subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
-                                                  preserve_alpha);
-             for (; x < 4; x++)
-               pDst_pixels_BGRA[x] = 
rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
-                                                  
subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
-                                                  preserve_alpha);
-
-             pDst_pixels_BGRA += 4;
-          }
-     }
+   unsigned char val0 = (bytes[7] & 1) | ((bytes[5] & 1) << 1);
+   unsigned char val1 = ((bytes[7] >> 4) & 1) | (((bytes[5] >> 4) & 1) << 1);
+   unsigned char val2 = (bytes[6] & 1) | ((bytes[4] & 1) << 1);
+   unsigned char val3 = ((bytes[6] >> 4) & 1) | (((bytes[4] >> 4) & 1) << 1);
+   unsigned char val4 = ((bytes[7] >> 1) & 1) | (((bytes[5] >> 1) & 1) << 1);
+   unsigned char val5 = ((bytes[7] >> 5) & 1) | (((bytes[5] >> 5) & 1) << 1);
+   unsigned char val6 = ((bytes[6] >> 1) & 1) | (((bytes[4] >> 1) & 1) << 1);
+   unsigned char val7 = ((bytes[6] >> 5) & 1) | (((bytes[4] >> 5) & 1) << 1);
+   unsigned char val8 = ((bytes[7] >> 2) & 1) | (((bytes[5] >> 2) & 1) << 1);
+   unsigned char val9 = ((bytes[7] >> 6) & 1) | (((bytes[5] >> 6) & 1) << 1);
+   unsigned char val10 = ((bytes[6] >> 2) & 1) | (((bytes[4] >> 2) & 1) << 1);
+   unsigned char val11 = ((bytes[6] >> 6) & 1) | (((bytes[4] >> 6) & 1) << 1);
+   unsigned char val12 = ((bytes[7] >> 3) & 1) | (((bytes[5] >> 3) & 1) << 1);
+   unsigned char val13 = ((bytes[7] >> 7) & 1) | (((bytes[5] >> 7) & 1) << 1);
+   unsigned char val14 = ((bytes[6] >> 3) & 1) | (((bytes[4] >> 3) & 1) << 1);
+   unsigned char val15 = ((bytes[6] >> 7) & 1) | (((bytes[4] >> 7) & 1) << 1);
+
+   if (preserve_alpha)   // Depending on flip_flag.
+      {
+       if (flip_flag)
+         {
+            pDst_pixels_BGRA[0] = rg_etc1_color_quad_set(pDst_pixels_BGRA[0],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val0]]);
+            pDst_pixels_BGRA[1] = rg_etc1_color_quad_set(pDst_pixels_BGRA[1],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val1]]);
+            pDst_pixels_BGRA[2] = rg_etc1_color_quad_set(pDst_pixels_BGRA[2],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val2]]);
+            pDst_pixels_BGRA[3] = rg_etc1_color_quad_set(pDst_pixels_BGRA[3],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val3]]);
+            pDst_pixels_BGRA[4] = rg_etc1_color_quad_set(pDst_pixels_BGRA[4],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val4]]);
+            pDst_pixels_BGRA[5] = rg_etc1_color_quad_set(pDst_pixels_BGRA[5],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val5]]);
+            pDst_pixels_BGRA[6] = rg_etc1_color_quad_set(pDst_pixels_BGRA[6],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val6]]);
+            pDst_pixels_BGRA[7] = rg_etc1_color_quad_set(pDst_pixels_BGRA[7],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val7]]);
+            pDst_pixels_BGRA[8] = rg_etc1_color_quad_set(pDst_pixels_BGRA[8],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val8]]);
+            pDst_pixels_BGRA[9] = rg_etc1_color_quad_set(pDst_pixels_BGRA[9],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val9]]);
+            pDst_pixels_BGRA[10] = rg_etc1_color_quad_set(pDst_pixels_BGRA[10],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val10]]);
+            pDst_pixels_BGRA[11] = rg_etc1_color_quad_set(pDst_pixels_BGRA[11],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val11]]);
+            pDst_pixels_BGRA[12] = rg_etc1_color_quad_set(pDst_pixels_BGRA[12],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val12]]);
+            pDst_pixels_BGRA[13] = rg_etc1_color_quad_set(pDst_pixels_BGRA[13],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val13]]);
+            pDst_pixels_BGRA[14] = rg_etc1_color_quad_set(pDst_pixels_BGRA[14],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val14]]);
+            pDst_pixels_BGRA[15] = rg_etc1_color_quad_set(pDst_pixels_BGRA[15],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val15]]);
+         }
+       else
+         {
+            pDst_pixels_BGRA[0] = rg_etc1_color_quad_set(pDst_pixels_BGRA[0],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val0]]);
+            pDst_pixels_BGRA[1] = rg_etc1_color_quad_set(pDst_pixels_BGRA[1],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val1]]);
+            pDst_pixels_BGRA[2] = rg_etc1_color_quad_set(pDst_pixels_BGRA[2],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val2]]);
+            pDst_pixels_BGRA[3] = rg_etc1_color_quad_set(pDst_pixels_BGRA[3],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val3]]);
+            pDst_pixels_BGRA[4] = rg_etc1_color_quad_set(pDst_pixels_BGRA[4],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val4]]);
+            pDst_pixels_BGRA[5] = rg_etc1_color_quad_set(pDst_pixels_BGRA[5],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val5]]);
+            pDst_pixels_BGRA[6] = rg_etc1_color_quad_set(pDst_pixels_BGRA[6],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val6]]);
+            pDst_pixels_BGRA[7] = rg_etc1_color_quad_set(pDst_pixels_BGRA[7],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val7]]);
+            pDst_pixels_BGRA[8] = rg_etc1_color_quad_set(pDst_pixels_BGRA[8],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val8]]);
+            pDst_pixels_BGRA[9] = rg_etc1_color_quad_set(pDst_pixels_BGRA[9],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val9]]);
+            pDst_pixels_BGRA[10] = rg_etc1_color_quad_set(pDst_pixels_BGRA[10],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val10]]);
+            pDst_pixels_BGRA[11] = rg_etc1_color_quad_set(pDst_pixels_BGRA[11],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val11]]);
+            pDst_pixels_BGRA[12] = rg_etc1_color_quad_set(pDst_pixels_BGRA[12],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val12]]);
+            pDst_pixels_BGRA[13] = rg_etc1_color_quad_set(pDst_pixels_BGRA[13],
+                                               
subblock_colors0[rg_etc1_to_selector_index[val13]]);
+            pDst_pixels_BGRA[14] = rg_etc1_color_quad_set(pDst_pixels_BGRA[14],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val14]]);
+            pDst_pixels_BGRA[15] = rg_etc1_color_quad_set(pDst_pixels_BGRA[15],
+                                               
subblock_colors1[rg_etc1_to_selector_index[val15]]);
+         }
+      }
+      else
+      {
+       if (flip_flag)
+         {
+            pDst_pixels_BGRA[0] = 
subblock_colors0[rg_etc1_to_selector_index[val0]];
+            pDst_pixels_BGRA[1] = 
subblock_colors0[rg_etc1_to_selector_index[val1]];
+            pDst_pixels_BGRA[2] = 
subblock_colors0[rg_etc1_to_selector_index[val2]];
+            pDst_pixels_BGRA[3] = 
subblock_colors0[rg_etc1_to_selector_index[val3]];
+            pDst_pixels_BGRA[4] = 
subblock_colors0[rg_etc1_to_selector_index[val4]];
+            pDst_pixels_BGRA[5] = 
subblock_colors0[rg_etc1_to_selector_index[val5]];
+            pDst_pixels_BGRA[6] = 
subblock_colors0[rg_etc1_to_selector_index[val6]];
+            pDst_pixels_BGRA[7] = 
subblock_colors0[rg_etc1_to_selector_index[val7]];
+            pDst_pixels_BGRA[8] = 
subblock_colors1[rg_etc1_to_selector_index[val8]];
+            pDst_pixels_BGRA[9] = 
subblock_colors1[rg_etc1_to_selector_index[val9]];
+            pDst_pixels_BGRA[10] = 
subblock_colors1[rg_etc1_to_selector_index[val10]];
+            pDst_pixels_BGRA[11] = 
subblock_colors1[rg_etc1_to_selector_index[val11]];
+            pDst_pixels_BGRA[12] = 
subblock_colors1[rg_etc1_to_selector_index[val12]];
+            pDst_pixels_BGRA[13] = 
subblock_colors1[rg_etc1_to_selector_index[val13]];
+            pDst_pixels_BGRA[14] = 
subblock_colors1[rg_etc1_to_selector_index[val14]];
+            pDst_pixels_BGRA[15] = 
subblock_colors1[rg_etc1_to_selector_index[val15]];
+         }
+       else
+         {
+            pDst_pixels_BGRA[0] = 
subblock_colors0[rg_etc1_to_selector_index[val0]];
+            pDst_pixels_BGRA[1] = 
subblock_colors0[rg_etc1_to_selector_index[val1]];
+            pDst_pixels_BGRA[2] = 
subblock_colors1[rg_etc1_to_selector_index[val2]];
+            pDst_pixels_BGRA[3] = 
subblock_colors1[rg_etc1_to_selector_index[val3]];
+            pDst_pixels_BGRA[4] = 
subblock_colors0[rg_etc1_to_selector_index[val4]];
+            pDst_pixels_BGRA[5] = 
subblock_colors0[rg_etc1_to_selector_index[val5]];
+            pDst_pixels_BGRA[6] = 
subblock_colors1[rg_etc1_to_selector_index[val6]];
+            pDst_pixels_BGRA[7] = 
subblock_colors1[rg_etc1_to_selector_index[val7]];
+            pDst_pixels_BGRA[8] = 
subblock_colors0[rg_etc1_to_selector_index[val8]];
+            pDst_pixels_BGRA[9] = 
subblock_colors0[rg_etc1_to_selector_index[val9]];
+            pDst_pixels_BGRA[10] = 
subblock_colors1[rg_etc1_to_selector_index[val10]];
+            pDst_pixels_BGRA[11] = 
subblock_colors1[rg_etc1_to_selector_index[val11]];
+            pDst_pixels_BGRA[12] = 
subblock_colors0[rg_etc1_to_selector_index[val12]];
+            pDst_pixels_BGRA[13] = 
subblock_colors0[rg_etc1_to_selector_index[val13]];
+            pDst_pixels_BGRA[14] = 
subblock_colors1[rg_etc1_to_selector_index[val14]];
+            pDst_pixels_BGRA[15] = 
subblock_colors1[rg_etc1_to_selector_index[val15]];
+         }
+      }
 
    return success;
 }
@@ -1763,7 +1847,7 @@ rg_etc1_optimizer_compute(rg_etc1_optimizer *optimizer)
       uint i;
       const uint8* pSelectors = optimizer->m_best_solution.m_selectors;
 
-      
rg_etc1_solution_coordinates_block_colors_get(&optimizer->m_best_solution.m_coords,
 block_colors);
+      
rg_etc1_solution_coordinates_block_colors_get(optimizer->m_best_solution.m_coords,
 block_colors);
       pSrc_pixels = optimizer->m_pParams->m_pSrc_pixels;
       for (i = 0; i < n; i++)
         actual_error += 
rg_etc1_color_quad_u8_rgb_squared_distance(pSrc_pixels[i], 
block_colors[pSelectors[i]]);

-- 


Reply via email to