Marc,

On 6/6/2011 9:20 AM, Marc-André Moreau wrote:
I read more about SSE, and then about NEON which is the equivalent for ARM

My first impression is damn, how could I not see this before? This thing looks very well suited not only for acceleration of RemoteFX decoding, but there's a chance that more GDI operations could be accelerated with it than the current implementation in xfreerdp. Color conversion also appears to be possible with it. If someone wants to work on something like this, let me know.

I started working on adding SSE/SSE2 decoding support to the RemoteFX library.

I think there are several questions that still need to be answered on how to best wire this up, but please review the attached .patch file to see what I have working so far. This .patch file is based off of your recent changes in the awakecoding/FreeRDP branch.

As a starting place, I broke out the YCbCr to RGB conversion code out of rfx_decode_rgb and into a separate function. I then added an SSE 'optimized' version of it. Also included is a file with the disassembly of the rfx_decode.o file that clearly shows the difference between the 2 functions.

One note... I had to use a ./configure CFLAGS="-O2 -msse2" command to get this code to compile (the -O2 isn't actually needed, but cleans up the assembled code). I think we would need to find a better way of automatically handling this. Maybe a --with-sse flag that can be passed to ./configure with #ifdef lines around SSE code? Help around how to set this up would be appreciated.

Then there are questions about structure. Should we break out SSE optimizations into their own files and/or libraries, or leave them alongside their non-SSE cousins?

Lastly, is there a good way to test if and how much better these optimizations actually are? I started messing around with gprof, sprof, and oprofile, but I can't seem to get debug info out of the libfreerdp-rfx static library. gprof works, but only records info on the xfreerdp application and not on static libraries. I can't seem to get sprof or oprofile working either. Maybe it is just the way I was using them, but is there a better/easier way to profile this library? Or... maybe we could set up a unit test with known RFX data that can be run through a number of iterations and then time it?

Any other thoughts?

-Steve



diff --git a/X11/xf_decode.c b/X11/xf_decode.c
index 092aced..acd40c8 100644
--- a/X11/xf_decode.c
+++ b/X11/xf_decode.c
@@ -87,6 +87,8 @@ xf_decode_frame(xfInfo * xfi, int x, int y, uint8 * bitmapData, uint32 bitmapDat
 			}
 			rfx_message_free(xfi->rfx_context, message);
 
+			XSetClipMask(xfi->display, xfi->gc, None);
+
 			break;
 
 		default:
diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h
index a5a9ed8..346a26f 100644
--- a/include/freerdp/rfx.h
+++ b/include/freerdp/rfx.h
@@ -137,14 +137,20 @@ struct _RFX_CONTEXT
 
 	RFX_POOL* pool; /* memory pool */
 
-	uint32 y_buffer[4096]; /* 4096 = 64x64 */
-	uint32 cr_buffer[4096]; /* 4096 = 64x64 */
-	uint32 cb_buffer[4096]; /* 4096 = 64x64 */
-
+	uint32 y_r_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+	uint32 cb_g_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+	uint32 cr_b_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */
+ 
+ 	uint32* y_r_buffer;
+	uint32* cb_g_buffer;
+	uint32* cr_b_buffer;
+ 
 	uint32 idwt_buffer_8[256]; /* sub-band width 8 */
 	uint32 idwt_buffer_16[1024]; /* sub-band width 16 */
 	uint32 idwt_buffer_32[4096]; /* sub-band width 32 */
 	uint32* idwt_buffers[5]; /* sub-band buffer array */
+	
+	void (* decode_YCbCr_to_RGB)(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
 };
 typedef struct _RFX_CONTEXT RFX_CONTEXT;
 
diff --git a/libfreerdp-rfx/librfx.c b/libfreerdp-rfx/librfx.c
index a08cdfc..572f1e5 100644
--- a/libfreerdp-rfx/librfx.c
+++ b/libfreerdp-rfx/librfx.c
@@ -20,6 +20,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <inttypes.h>
 #include <freerdp/rfx.h>
 #include <freerdp/types/base.h>
 #include <freerdp/utils/stream.h>
@@ -39,10 +40,26 @@ rfx_context_new(void)
 
 	context->pool = rfx_pool_new();
 
+	// align buffers to 16 byte boundary (needed for SSE/SSE2 instructions)
+	context->y_r_buffer = (uint32 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F);
+	context->cb_g_buffer = (uint32 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F);
+	context->cr_b_buffer = (uint32 *)(((uintptr_t)context->cr_b_mem + 16) & ~ 0x0F);
+
 	context->idwt_buffers[1] = (uint32*) context->idwt_buffer_8;
 	context->idwt_buffers[2] = (uint32*) context->idwt_buffer_16;
 	context->idwt_buffers[4] = (uint32*) context->idwt_buffer_32;
 
+
+
+	//blah
+	context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB;
+
+	// TODO: how to best tell if sse/sse2 is available and desired?
+	if (1)
+	{
+		context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_sse;
+	}
+
 	return context;
 }
 
diff --git a/libfreerdp-rfx/rfx_decode.c b/libfreerdp-rfx/rfx_decode.c
index ff51f69..e61e126 100644
--- a/libfreerdp-rfx/rfx_decode.c
+++ b/libfreerdp-rfx/rfx_decode.c
@@ -24,11 +24,108 @@
 #include "rfx_differential.h"
 #include "rfx_quantization.h"
 #include "rfx_dwt.h"
-
 #include "rfx_decode.h"
 
+#include <inttypes.h>
+#include "xmmintrin.h"
+#include "emmintrin.h"
+
 #define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v)))
 
+/* TODO: move these sse helpers to a seperate include file */
+
+static __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_between_ps (__m128 val, __m128 min, __m128 max)
+{
+	__m128 ret;
+	ret = _mm_max_ps(val, min);
+	return _mm_min_ps(ret, max);
+}
+
+static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtps_epi32_and_store (__m128i * loc, __m128 val)
+{
+	__m128i tmp;
+	tmp = _mm_cvtps_epi32(val);
+	_mm_stream_si128(loc, tmp);
+}
+
+void
+rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buffer, uint32 * cb_g_buffer, uint32 * cr_b_buffer)
+{
+	__m128 y_add = _mm_set_ps1(128.0f);
+	__m128 r_cr_t = _mm_set_ps1(1.403f);
+	__m128 g_cb_t = _mm_set_ps1(-0.344f);
+	__m128 g_cr_t = _mm_set_ps1(-0.714f);
+	__m128 b_cb_t = _mm_set_ps1(1.77f);
+
+	__m128 min = _mm_set_ps1(0.0f);
+	__m128 max = _mm_set_ps1(255.0f);
+
+	__m128 y, cb, cr;
+	__m128 r, g, b, tmp;	
+
+	__m128i * y_r_buf = (__m128i*) y_r_buffer;
+	__m128i * cb_g_buf = (__m128i*) cb_g_buffer;
+	__m128i * cr_b_buf = (__m128i*) cr_b_buffer;
+
+	int i;
+	for (i = 0; i < (4096 / 4); i++)
+	{
+		y = _mm_cvtepi32_ps(*y_r_buf);
+		cb = _mm_cvtepi32_ps(*cb_g_buf);
+		cr = _mm_cvtepi32_ps(*cr_b_buf);
+
+		// y = y + 128
+		y = _mm_add_ps(y, y_add);
+
+		// r = between(y + (cr * 1.403), 0, 255)
+		r = _mm_mul_ps(cr, r_cr_t);
+		r = _mm_add_ps(r, y);
+		r = _mm_between_ps(r, min, max);
+		_mm_cvtps_epi32_and_store(y_r_buf, r);
+
+		// g = between(y + (cb * -0.344) + (cr * -0.714), 0, 255)
+		g = _mm_mul_ps(cb, g_cb_t);
+		tmp = _mm_mul_ps(cr, g_cr_t);
+		g = _mm_add_ps(g, tmp);
+		g = _mm_add_ps(g, y);
+		g = _mm_between_ps(g, min, max);
+		_mm_cvtps_epi32_and_store(cb_g_buf, g);
+
+		// b = between(y + (cb * 1.77), 0, 255)
+		b = _mm_mul_ps(cb, b_cb_t);
+		b = _mm_add_ps(b, y);
+		b = _mm_between_ps(b, min, max);
+		_mm_cvtps_epi32_and_store(cr_b_buf, b);
+
+		y_r_buf++;
+		cb_g_buf++;
+		cr_b_buf++;
+	}
+}
+
+void
+rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf)
+{
+	int y, cb, cr;
+	int r, g, b;
+
+	int i;
+	for (i = 0; i < 4096; i++)
+	{
+		y = y_r_buf[i] + 128;
+		cb = cb_g_buf[i];
+		cr = cr_b_buf[i];
+		r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5));
+		y_r_buf[i] = MINMAX(r, 0, 255);
+		g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5)));
+		cb_g_buf[i] = MINMAX(g, 0, 255);
+		b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6));
+		cr_b_buf[i] = MINMAX(b, 0, 255);
+	}
+}
+
 static void
 rfx_decode_component(RFX_CONTEXT * context, const uint32 * quantization_values, int half,
 	const uint8 * data, int size, uint32 * buffer)
@@ -67,23 +164,17 @@ rfx_decode_rgb(RFX_CONTEXT * context,
 	int y, cb, cr;
 
 	dst = rgb_buffer;
-	rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_buffer);
-	rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_buffer);
-	rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_buffer);
+	rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_r_buffer);
+	rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_g_buffer);
+	rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_b_buffer);
+
+	context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer);
 
 	for (i = 0; i < 4096; i++)
 	{
-		y = context->y_buffer[i] + 128;
-		cb = context->cb_buffer[i];
-		cr = context->cr_buffer[i];
-
-		r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5));
-		r = MINMAX(r, 0, 255);
-		g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5)));
-		g = MINMAX(g, 0, 255);
-		b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6));
-		b = MINMAX(b, 0, 255);
-
+		r = context->y_r_buffer[i];
+		g = context->cb_g_buffer[i];
+		b = context->cr_b_buffer[i];
 		switch (context->pixel_format)
 		{
 			case RFX_PIXEL_FORMAT_BGRA:
@@ -112,6 +203,5 @@ rfx_decode_rgb(RFX_CONTEXT * context,
 				break;
 		}
 	}
-
 	return rgb_buffer;
 }
diff --git a/libfreerdp-rfx/rfx_decode.h b/libfreerdp-rfx/rfx_decode.h
index afcc965..02585a2 100644
--- a/libfreerdp-rfx/rfx_decode.h
+++ b/libfreerdp-rfx/rfx_decode.h
@@ -22,6 +22,12 @@
 
 #include <freerdp/rfx.h>
 
+void
+rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+
+void
+rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf);
+
 unsigned char *
 rfx_decode_rgb(RFX_CONTEXT * context,
 	const uint8 * y_data, int y_size, const uint32 * y_quants,
libfreerdp-rfx/libfreerdp_rfx_la-rfx_decode.o:     file format elf32-i386


Disassembly of section .text:

00000000 <rfx_decode_YCbCr_to_RGB>:
   0:   55                      push   %ebp
   1:   31 c0                   xor    %eax,%eax
   3:   89 e5                   mov    %esp,%ebp
   5:   57                      push   %edi
   6:   56                      push   %esi
   7:   53                      push   %ebx
   8:   83 ec 10                sub    $0x10,%esp
   b:   90                      nop
   c:   8d 74 26 00             lea    0x0(%esi,%eiz,1),%esi
  10:   8b 55 08                mov    0x8(%ebp),%edx
  13:   8b 4d 0c                mov    0xc(%ebp),%ecx
  16:   8b 75 10                mov    0x10(%ebp),%esi
  19:   8b 1c 82                mov    (%edx,%eax,4),%ebx
  1c:   8b 14 81                mov    (%ecx,%eax,4),%edx
  1f:   8b 0c 86                mov    (%esi,%eax,4),%ecx
  22:   83 eb 80                sub    $0xffffff80,%ebx
  25:   89 cf                   mov    %ecx,%edi
  27:   89 ce                   mov    %ecx,%esi
  29:   c1 ff 03                sar    $0x3,%edi
  2c:   89 7d e4                mov    %edi,-0x1c(%ebp)
  2f:   89 cf                   mov    %ecx,%edi
  31:   c1 ff 02                sar    $0x2,%edi
  34:   8d 3c 3b                lea    (%ebx,%edi,1),%edi
  37:   01 cf                   add    %ecx,%edi
  39:   03 7d e4                add    -0x1c(%ebp),%edi
  3c:   c1 fe 05                sar    $0x5,%esi
  3f:   89 75 ec                mov    %esi,-0x14(%ebp)
  42:   89 7d f0                mov    %edi,-0x10(%ebp)
  45:   8b 75 f0                mov    -0x10(%ebp),%esi
  48:   31 ff                   xor    %edi,%edi
  4a:   03 75 ec                add    -0x14(%ebp),%esi
  4d:   89 75 f0                mov    %esi,-0x10(%ebp)
  50:   78 0d                   js     5f <rfx_decode_YCbCr_to_RGB+0x5f>
  52:   81 fe ff 00 00 00       cmp    $0xff,%esi
  58:   66 bf ff 00             mov    $0xff,%di
  5c:   0f 4e fe                cmovle %esi,%edi
  5f:   8b 75 08                mov    0x8(%ebp),%esi
  62:   89 3c 86                mov    %edi,(%esi,%eax,4)
  65:   89 de                   mov    %ebx,%esi
  67:   89 d7                   mov    %edx,%edi
  69:   2b 75 e4                sub    -0x1c(%ebp),%esi
  6c:   c1 ff 02                sar    $0x2,%edi
  6f:   89 7d e8                mov    %edi,-0x18(%ebp)
  72:   29 fe                   sub    %edi,%esi
  74:   89 cf                   mov    %ecx,%edi
  76:   d1 ff                   sar    %edi
  78:   29 fe                   sub    %edi,%esi
  7a:   89 d7                   mov    %edx,%edi
  7c:   c1 f9 04                sar    $0x4,%ecx
  7f:   29 ce                   sub    %ecx,%esi
  81:   89 d1                   mov    %edx,%ecx
  83:   c1 f9 05                sar    $0x5,%ecx
  86:   29 ce                   sub    %ecx,%esi
  88:   31 c9                   xor    %ecx,%ecx
  8a:   2b 75 ec                sub    -0x14(%ebp),%esi
  8d:   c1 ff 04                sar    $0x4,%edi
  90:   29 fe                   sub    %edi,%esi
  92:   78 0b                   js     9f <rfx_decode_YCbCr_to_RGB+0x9f>
  94:   81 fe ff 00 00 00       cmp    $0xff,%esi
  9a:   b1 ff                   mov    $0xff,%cl
  9c:   0f 4e ce                cmovle %esi,%ecx
  9f:   8b 75 0c                mov    0xc(%ebp),%esi
  a2:   03 5d e8                add    -0x18(%ebp),%ebx
  a5:   89 0c 86                mov    %ecx,(%esi,%eax,4)
  a8:   89 d1                   mov    %edx,%ecx
  aa:   01 d3                   add    %edx,%ebx
  ac:   d1 f9                   sar    %ecx
  ae:   01 cb                   add    %ecx,%ebx
  b0:   31 c9                   xor    %ecx,%ecx
  b2:   c1 fa 06                sar    $0x6,%edx
  b5:   01 da                   add    %ebx,%edx
  b7:   78 0b                   js     c4 <rfx_decode_YCbCr_to_RGB+0xc4>
  b9:   81 fa ff 00 00 00       cmp    $0xff,%edx
  bf:   b1 ff                   mov    $0xff,%cl
  c1:   0f 4e ca                cmovle %edx,%ecx
  c4:   8b 7d 10                mov    0x10(%ebp),%edi
  c7:   89 0c 87                mov    %ecx,(%edi,%eax,4)
  ca:   83 c0 01                add    $0x1,%eax
  cd:   3d 00 10 00 00          cmp    $0x1000,%eax
  d2:   0f 85 38 ff ff ff       jne    10 <rfx_decode_YCbCr_to_RGB+0x10>
  d8:   83 c4 10                add    $0x10,%esp
  db:   5b                      pop    %ebx
  dc:   5e                      pop    %esi
  dd:   5f                      pop    %edi
  de:   5d                      pop    %ebp
  df:   c3                      ret    

000000e0 <rfx_decode_YCbCr_to_RGB_sse>:
  e0:   55                      push   %ebp
  e1:   0f 57 db                xorps  %xmm3,%xmm3
  e4:   89 e5                   mov    %esp,%ebp
  e6:   8b 45 08                mov    0x8(%ebp),%eax
  e9:   8b 4d 0c                mov    0xc(%ebp),%ecx
  ec:   8b 55 10                mov    0x10(%ebp),%edx
  ef:   0f 28 3d 00 00 00 00    movaps 0x0,%xmm7
  f6:   53                      push   %ebx
  f7:   0f 28 35 10 00 00 00    movaps 0x10,%xmm6
  fe:   8d 98 00 40 00 00       lea    0x4000(%eax),%ebx
 104:   0f 28 15 50 00 00 00    movaps 0x50,%xmm2
 10b:   90                      nop
 10c:   8d 74 26 00             lea    0x0(%esi,%eiz,1),%esi
 110:   0f 5b 2a                cvtdq2ps (%edx),%xmm5
 113:   0f 28 c5                movaps %xmm5,%xmm0
 116:   0f 5b 08                cvtdq2ps (%eax),%xmm1
 119:   0f 58 cf                addps  %xmm7,%xmm1
 11c:   0f 5b 21                cvtdq2ps (%ecx),%xmm4
 11f:   0f 59 c6                mulps  %xmm6,%xmm0
 122:   0f 59 2d 30 00 00 00    mulps  0x30,%xmm5
 129:   0f 58 c1                addps  %xmm1,%xmm0
 12c:   0f 5f c3                maxps  %xmm3,%xmm0
 12f:   0f 5d c2                minps  %xmm2,%xmm0
 132:   66 0f 5b c0             cvtps2dq %xmm0,%xmm0
 136:   66 0f e7 00             movntdq %xmm0,(%eax)
 13a:   0f 28 05 20 00 00 00    movaps 0x20,%xmm0
 141:   83 c0 10                add    $0x10,%eax
 144:   0f 59 c4                mulps  %xmm4,%xmm0
 147:   0f 58 c5                addps  %xmm5,%xmm0
 14a:   0f 58 c1                addps  %xmm1,%xmm0
 14d:   0f 5f c3                maxps  %xmm3,%xmm0
 150:   0f 5d c2                minps  %xmm2,%xmm0
 153:   66 0f 5b c0             cvtps2dq %xmm0,%xmm0
 157:   66 0f e7 01             movntdq %xmm0,(%ecx)
 15b:   0f 28 05 40 00 00 00    movaps 0x40,%xmm0
 162:   83 c1 10                add    $0x10,%ecx
 165:   0f 59 c4                mulps  %xmm4,%xmm0
 168:   0f 58 c1                addps  %xmm1,%xmm0
 16b:   0f 5f c3                maxps  %xmm3,%xmm0
 16e:   0f 5d c2                minps  %xmm2,%xmm0
 171:   66 0f 5b c0             cvtps2dq %xmm0,%xmm0
 175:   66 0f e7 02             movntdq %xmm0,(%edx)
 179:   83 c2 10                add    $0x10,%edx
 17c:   39 d8                   cmp    %ebx,%eax
 17e:   75 90                   jne    110 <rfx_decode_YCbCr_to_RGB_sse+0x30>
 180:   5b                      pop    %ebx
 181:   5d                      pop    %ebp
 182:   c3                      ret    
 183:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
 189:   8d bc 27 00 00 00 00    lea    0x0(%edi,%eiz,1),%edi

00000190 <T.68>:
 190:   55                      push   %ebp
 191:   89 e5                   mov    %esp,%ebp
 193:   57                      push   %edi
 194:   56                      push   %esi
 195:   89 d6                   mov    %edx,%esi
 197:   53                      push   %ebx
 198:   89 c3                   mov    %eax,%ebx
 19a:   83 ec 4c                sub    $0x4c,%esp
 19d:   8b 45 08                mov    0x8(%ebp),%eax
 1a0:   8b 7d 0c                mov    0xc(%ebp),%edi
 1a3:   89 4c 24 04             mov    %ecx,0x4(%esp)
 1a7:   c7 44 24 10 00 10 00    movl   $0x1000,0x10(%esp)
 1ae:   00 
 1af:   89 44 24 08             mov    %eax,0x8(%esp)
 1b3:   8b 43 08                mov    0x8(%ebx),%eax
 1b6:   89 7c 24 0c             mov    %edi,0xc(%esp)
 1ba:   89 04 24                mov    %eax,(%esp)
 1bd:   e8 fc ff ff ff          call   1be <T.68+0x2e>
 1c2:   8d 8f 00 3f 00 00       lea    0x3f00(%edi),%ecx
 1c8:   89 0c 24                mov    %ecx,(%esp)
 1cb:   89 4d e4                mov    %ecx,-0x1c(%ebp)
 1ce:   c7 44 24 04 40 00 00    movl   $0x40,0x4(%esp)
 1d5:   00 
 1d6:   e8 fc ff ff ff          call   1d7 <T.68+0x47>
 1db:   8b 46 20                mov    0x20(%esi),%eax
 1de:   89 3c 24                mov    %edi,(%esp)
 1e1:   c7 44 24 04 00 04 00    movl   $0x400,0x4(%esp)
 1e8:   00 
 1e9:   89 44 24 08             mov    %eax,0x8(%esp)
 1ed:   e8 fc ff ff ff          call   1ee <T.68+0x5e>
 1f2:   8b 46 1c                mov    0x1c(%esi),%eax
 1f5:   c7 44 24 04 00 04 00    movl   $0x400,0x4(%esp)
 1fc:   00 
 1fd:   89 44 24 08             mov    %eax,0x8(%esp)
 201:   8d 87 00 10 00 00       lea    0x1000(%edi),%eax
 207:   89 04 24                mov    %eax,(%esp)
 20a:   e8 fc ff ff ff          call   20b <T.68+0x7b>
 20f:   8b 46 24                mov    0x24(%esi),%eax
 212:   c7 44 24 04 00 04 00    movl   $0x400,0x4(%esp)
 219:   00 
 21a:   89 44 24 08             mov    %eax,0x8(%esp)
 21e:   8d 87 00 20 00 00       lea    0x2000(%edi),%eax
 224:   89 04 24                mov    %eax,(%esp)
 227:   e8 fc ff ff ff          call   228 <T.68+0x98>
 22c:   8b 46 14                mov    0x14(%esi),%eax
 22f:   c7 44 24 04 00 01 00    movl   $0x100,0x4(%esp)
 236:   00 
 237:   89 44 24 08             mov    %eax,0x8(%esp)
 23b:   8d 87 00 30 00 00       lea    0x3000(%edi),%eax
 241:   89 45 d4                mov    %eax,-0x2c(%ebp)
 244:   89 04 24                mov    %eax,(%esp)
 247:   e8 fc ff ff ff          call   248 <T.68+0xb8>
 24c:   8b 56 10                mov    0x10(%esi),%edx
 24f:   c7 44 24 04 00 01 00    movl   $0x100,0x4(%esp)
 256:   00 
 257:   89 54 24 08             mov    %edx,0x8(%esp)
 25b:   8d 97 00 34 00 00       lea    0x3400(%edi),%edx
 261:   89 14 24                mov    %edx,(%esp)
 264:   e8 fc ff ff ff          call   265 <T.68+0xd5>
 269:   8b 56 18                mov    0x18(%esi),%edx
 26c:   c7 44 24 04 00 01 00    movl   $0x100,0x4(%esp)
 273:   00 
 274:   89 54 24 08             mov    %edx,0x8(%esp)
 278:   8d 97 00 38 00 00       lea    0x3800(%edi),%edx
 27e:   89 14 24                mov    %edx,(%esp)
 281:   e8 fc ff ff ff          call   282 <T.68+0xf2>
 286:   8b 56 08                mov    0x8(%esi),%edx
 289:   c7 44 24 04 40 00 00    movl   $0x40,0x4(%esp)
 290:   00 
 291:   89 54 24 08             mov    %edx,0x8(%esp)
 295:   8d 97 00 3c 00 00       lea    0x3c00(%edi),%edx
 29b:   89 14 24                mov    %edx,(%esp)
 29e:   89 55 e0                mov    %edx,-0x20(%ebp)
 2a1:   e8 fc ff ff ff          call   2a2 <T.68+0x112>
 2a6:   8b 46 04                mov    0x4(%esi),%eax
 2a9:   c7 44 24 04 40 00 00    movl   $0x40,0x4(%esp)
 2b0:   00 
 2b1:   89 44 24 08             mov    %eax,0x8(%esp)
 2b5:   8d 87 00 3d 00 00       lea    0x3d00(%edi),%eax
 2bb:   89 04 24                mov    %eax,(%esp)
 2be:   e8 fc ff ff ff          call   2bf <T.68+0x12f>
 2c3:   8b 46 0c                mov    0xc(%esi),%eax
 2c6:   c7 44 24 04 40 00 00    movl   $0x40,0x4(%esp)
 2cd:   00 
 2ce:   89 44 24 08             mov    %eax,0x8(%esp)
 2d2:   8d 87 70 3c 00 00       lea    0x3c70(%edi),%eax
 2d8:   89 04 24                mov    %eax,(%esp)
 2db:   e8 fc ff ff ff          call   2dc <T.68+0x14c>
 2e0:   8b 4d e4                mov    -0x1c(%ebp),%ecx
 2e3:   8b 36                   mov    (%esi),%esi
 2e5:   c7 44 24 04 40 00 00    movl   $0x40,0x4(%esp)
 2ec:   00 
 2ed:   89 0c 24                mov    %ecx,(%esp)
 2f0:   89 74 24 08             mov    %esi,0x8(%esp)
 2f4:   e8 fc ff ff ff          call   2f5 <T.68+0x165>
 2f9:   8b 55 e0                mov    -0x20(%ebp),%edx
 2fc:   89 1c 24                mov    %ebx,(%esp)
 2ff:   c7 44 24 08 08 00 00    movl   $0x8,0x8(%esp)
 306:   00 
 307:   89 54 24 04             mov    %edx,0x4(%esp)
 30b:   e8 fc ff ff ff          call   30c <T.68+0x17c>
 310:   8b 45 d4                mov    -0x2c(%ebp),%eax
 313:   89 1c 24                mov    %ebx,(%esp)
 316:   c7 44 24 08 10 00 00    movl   $0x10,0x8(%esp)
 31d:   00 
 31e:   89 44 24 04             mov    %eax,0x4(%esp)
 322:   e8 fc ff ff ff          call   323 <T.68+0x193>
 327:   89 7c 24 04             mov    %edi,0x4(%esp)
 32b:   89 1c 24                mov    %ebx,(%esp)
 32e:   c7 44 24 08 20 00 00    movl   $0x20,0x8(%esp)
 335:   00 
 336:   e8 fc ff ff ff          call   337 <T.68+0x1a7>
 33b:   83 c4 4c                add    $0x4c,%esp
 33e:   5b                      pop    %ebx
 33f:   5e                      pop    %esi
 340:   5f                      pop    %edi
 341:   5d                      pop    %ebp
 342:   c3                      ret    
 343:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
 349:   8d bc 27 00 00 00 00    lea    0x0(%edi,%eiz,1),%edi

00000350 <rfx_decode_rgb>:
 350:   55                      push   %ebp
 351:   89 e5                   mov    %esp,%ebp
 353:   57                      push   %edi
 354:   56                      push   %esi
 355:   53                      push   %ebx
 356:   83 ec 2c                sub    $0x2c,%esp
 359:   8b 5d 08                mov    0x8(%ebp),%ebx
 35c:   8b 4d 0c                mov    0xc(%ebp),%ecx
 35f:   8b 55 14                mov    0x14(%ebp),%edx
 362:   8b 83 58 c0 00 00       mov    0xc058(%ebx),%eax
 368:   89 44 24 04             mov    %eax,0x4(%esp)
 36c:   8b 45 10                mov    0x10(%ebp),%eax
 36f:   89 04 24                mov    %eax,(%esp)
 372:   89 d8                   mov    %ebx,%eax
 374:   e8 17 fe ff ff          call   190 <T.68>
 379:   8b 83 5c c0 00 00       mov    0xc05c(%ebx),%eax
 37f:   8b 4d 18                mov    0x18(%ebp),%ecx
 382:   8b 55 20                mov    0x20(%ebp),%edx
 385:   89 44 24 04             mov    %eax,0x4(%esp)
 389:   8b 45 1c                mov    0x1c(%ebp),%eax
 38c:   89 04 24                mov    %eax,(%esp)
 38f:   89 d8                   mov    %ebx,%eax
 391:   e8 fa fd ff ff          call   190 <T.68>
 396:   8b 83 60 c0 00 00       mov    0xc060(%ebx),%eax
 39c:   8b 55 2c                mov    0x2c(%ebp),%edx
 39f:   8b 4d 24                mov    0x24(%ebp),%ecx
 3a2:   89 44 24 04             mov    %eax,0x4(%esp)
 3a6:   8b 45 28                mov    0x28(%ebp),%eax
 3a9:   89 04 24                mov    %eax,(%esp)
 3ac:   89 d8                   mov    %ebx,%eax
 3ae:   e8 dd fd ff ff          call   190 <T.68>
 3b3:   8b 83 60 c0 00 00       mov    0xc060(%ebx),%eax
 3b9:   89 44 24 08             mov    %eax,0x8(%esp)
 3bd:   8b 83 5c c0 00 00       mov    0xc05c(%ebx),%eax
 3c3:   89 44 24 04             mov    %eax,0x4(%esp)
 3c7:   8b 83 58 c0 00 00       mov    0xc058(%ebx),%eax
 3cd:   89 04 24                mov    %eax,(%esp)
 3d0:   ff 93 78 14 01 00       call   *0x11478(%ebx)
 3d6:   8b 55 30                mov    0x30(%ebp),%edx
 3d9:   31 c0                   xor    %eax,%eax
 3db:   eb 24                   jmp    401 <rfx_decode_rgb+0xb1>
 3dd:   8d 76 00                lea    0x0(%esi),%esi
 3e0:   0f b6 4d e4             movzbl -0x1c(%ebp),%ecx
 3e4:   c6 42 03 ff             movb   $0xff,0x3(%edx)
 3e8:   88 0a                   mov    %cl,(%edx)
 3ea:   89 f1                   mov    %esi,%ecx
 3ec:   88 4a 01                mov    %cl,0x1(%edx)
 3ef:   89 f9                   mov    %edi,%ecx
 3f1:   88 4a 02                mov    %cl,0x2(%edx)
 3f4:   83 c2 04                add    $0x4,%edx
 3f7:   83 c0 04                add    $0x4,%eax
 3fa:   3d 00 40 00 00          cmp    $0x4000,%eax
 3ff:   74 56                   je     457 <rfx_decode_rgb+0x107>
 401:   8b 8b 58 c0 00 00       mov    0xc058(%ebx),%ecx
 407:   8b 3c 01                mov    (%ecx,%eax,1),%edi
 40a:   8b 8b 5c c0 00 00       mov    0xc05c(%ebx),%ecx
 410:   8b 34 01                mov    (%ecx,%eax,1),%esi
 413:   8b 8b 60 c0 00 00       mov    0xc060(%ebx),%ecx
 419:   8b 0c 01                mov    (%ecx,%eax,1),%ecx
 41c:   89 4d e4                mov    %ecx,-0x1c(%ebp)
 41f:   8b 4b 18                mov    0x18(%ebx),%ecx
 422:   83 f9 01                cmp    $0x1,%ecx
 425:   74 59                   je     480 <rfx_decode_rgb+0x130>
 427:   72 b7                   jb     3e0 <rfx_decode_rgb+0x90>
 429:   83 f9 02                cmp    $0x2,%ecx
 42c:   74 3a                   je     468 <rfx_decode_rgb+0x118>
 42e:   83 f9 03                cmp    $0x3,%ecx
 431:   8d b4 26 00 00 00 00    lea    0x0(%esi,%eiz,1),%esi
 438:   75 bd                   jne    3f7 <rfx_decode_rgb+0xa7>
 43a:   89 f9                   mov    %edi,%ecx
 43c:   83 c0 04                add    $0x4,%eax
 43f:   88 0a                   mov    %cl,(%edx)
 441:   89 f1                   mov    %esi,%ecx
 443:   88 4a 01                mov    %cl,0x1(%edx)
 446:   0f b6 4d e4             movzbl -0x1c(%ebp),%ecx
 44a:   88 4a 02                mov    %cl,0x2(%edx)
 44d:   83 c2 03                add    $0x3,%edx
 450:   3d 00 40 00 00          cmp    $0x4000,%eax
 455:   75 aa                   jne    401 <rfx_decode_rgb+0xb1>
 457:   8b 45 30                mov    0x30(%ebp),%eax
 45a:   83 c4 2c                add    $0x2c,%esp
 45d:   5b                      pop    %ebx
 45e:   5e                      pop    %esi
 45f:   5f                      pop    %edi
 460:   5d                      pop    %ebp
 461:   c3                      ret    
 462:   8d b6 00 00 00 00       lea    0x0(%esi),%esi
 468:   0f b6 4d e4             movzbl -0x1c(%ebp),%ecx
 46c:   88 0a                   mov    %cl,(%edx)
 46e:   89 f1                   mov    %esi,%ecx
 470:   88 4a 01                mov    %cl,0x1(%edx)
 473:   89 f9                   mov    %edi,%ecx
 475:   88 4a 02                mov    %cl,0x2(%edx)
 478:   83 c2 03                add    $0x3,%edx
 47b:   e9 77 ff ff ff          jmp    3f7 <rfx_decode_rgb+0xa7>
 480:   89 f9                   mov    %edi,%ecx
 482:   88 0a                   mov    %cl,(%edx)
 484:   89 f1                   mov    %esi,%ecx
 486:   88 4a 01                mov    %cl,0x1(%edx)
 489:   0f b6 4d e4             movzbl -0x1c(%ebp),%ecx
 48d:   c6 42 03 ff             movb   $0xff,0x3(%edx)
 491:   88 4a 02                mov    %cl,0x2(%edx)
 494:   83 c2 04                add    $0x4,%edx
 497:   e9 5b ff ff ff          jmp    3f7 <rfx_decode_rgb+0xa7>

Disassembly of section .rodata.cst16:

00000000 <.rodata.cst16>:
   0:   00 00                   add    %al,(%eax)
   2:   00 43 00                add    %al,0x0(%ebx)
   5:   00 00                   add    %al,(%eax)
   7:   43                      inc    %ebx
   8:   00 00                   add    %al,(%eax)
   a:   00 43 00                add    %al,0x0(%ebx)
   d:   00 00                   add    %al,(%eax)
   f:   43                      inc    %ebx
  10:   81 95 b3 3f 81 95 b3    adcl   $0x95813fb3,-0x6a7ec04d(%ebp)
  17:   3f 81 95 
  1a:   b3 3f                   mov    $0x3f,%bl
  1c:   81 95 b3 3f c5 20 b0    adcl   $0x20c5beb0,0x20c53fb3(%ebp)
  23:   be c5 20 
  26:   b0 be                   mov    $0xbe,%al
  28:   c5 20                   lds    (%eax),%esp
  2a:   b0 be                   mov    $0xbe,%al
  2c:   c5 20                   lds    (%eax),%esp
  2e:   b0 be                   mov    $0xbe,%al
  30:   b4 c8                   mov    $0xc8,%ah
  32:   36                      ss
  33:   bf b4 c8 36 bf          mov    $0xbf36c8b4,%edi
  38:   b4 c8                   mov    $0xc8,%ah
  3a:   36                      ss
  3b:   bf b4 c8 36 bf          mov    $0xbf36c8b4,%edi
  40:   5c                      pop    %esp
  41:   8f                      (bad)  
  42:   e2 3f                   loop   83 <rfx_decode_YCbCr_to_RGB+0x83>
  44:   5c                      pop    %esp
  45:   8f                      (bad)  
  46:   e2 3f                   loop   87 <rfx_decode_YCbCr_to_RGB+0x87>
  48:   5c                      pop    %esp
  49:   8f                      (bad)  
  4a:   e2 3f                   loop   8b <rfx_decode_YCbCr_to_RGB+0x8b>
  4c:   5c                      pop    %esp
  4d:   8f                      (bad)  
  4e:   e2 3f                   loop   8f <rfx_decode_YCbCr_to_RGB+0x8f>
  50:   00 00                   add    %al,(%eax)
  52:   7f 43                   jg     97 <rfx_decode_YCbCr_to_RGB+0x97>
  54:   00 00                   add    %al,(%eax)
  56:   7f 43                   jg     9b <rfx_decode_YCbCr_to_RGB+0x9b>
  58:   00 00                   add    %al,(%eax)
  5a:   7f 43                   jg     9f <rfx_decode_YCbCr_to_RGB+0x9f>
  5c:   00 00                   add    %al,(%eax)
  5e:   7f 43                   jg     a3 <rfx_decode_YCbCr_to_RGB+0xa3>

Disassembly of section .comment:

00000000 <.comment>:
   0:   00 47 43                add    %al,0x43(%edi)
   3:   43                      inc    %ebx
   4:   3a 20                   cmp    (%eax),%ah
   6:   28 55 62                sub    %dl,0x62(%ebp)
   9:   75 6e                   jne    79 <rfx_decode_YCbCr_to_RGB+0x79>
   b:   74 75                   je     82 <rfx_decode_YCbCr_to_RGB+0x82>
   d:   20 34 2e                and    %dh,(%esi,%ebp,1)
  10:   34 2e                   xor    $0x2e,%al
  12:   33 2d 34 75 62 75       xor    0x75627534,%ebp
  18:   6e                      outsb  %ds:(%esi),(%dx)
  19:   74 75                   je     90 <rfx_decode_YCbCr_to_RGB+0x90>
  1b:   35 29 20 34 2e          xor    $0x2e342029,%eax
  20:   34 2e                   xor    $0x2e,%al
  22:   33 00                   xor    (%eax),%eax
------------------------------------------------------------------------------
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________
Freerdp-devel mailing list
Freerdp-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/freerdp-devel

Reply via email to