Marc, On 6/6/2011 9:20 AM, Marc-André Moreau wrote:
I read more about SSE, and then about NEON which is the equivalent for ARMMy first impression is damn, how could I not see this before? This thing looks very well suited not only for acceleration of RemoteFX decoding, but there's a chance that more GDI operations could be accelerated with it than the current implementation in xfreerdp. Color conversion also appears to be possible with it. If someone wants to work on something like this, let me know.
I started working on adding SSE/SSE2 decoding support to the RemoteFX library.
I think there are several questions that still need to be answered on how to best wire this up, but please review the attached .patch file to see what I have working so far. This .patch file is based off of your recent changes in the awakecoding/FreeRDP branch.
As a starting place, I broke out the YCbCr to RGB conversion code out of rfx_decode_rgb and into a separate function. I then added an SSE 'optimized' version of it. Also included is a file with the disassembly of the rfx_decode.o file that clearly shows the difference between the 2 functions.
One note... I had to use a ./configure CFLAGS="-O2 -msse2" command to get this code to compile (the -O2 isn't actually needed, but cleans up the assembled code). I think we would need to find a better way of automatically handling this. Maybe a --with-sse flag that can be passed to ./configure with #ifdef lines around SSE code? Help around how to set this up would be appreciated.
Then there are questions about structure. Should we break out SSE optimizations into their own files and/or libraries, or leave them alongside their non-SSE cousins?
Lastly, is there a good way to test if and how much better these optimizations actually are? I started messing around with gprof, sprof, and oprofile, but I can't seem to get debug info out of the libfreerdp-rfx static library. gprof works, but only records info on the xfreerdp application and not on static libraries. I can't seem to get sprof or oprofile working either. Maybe it is just the way I was using them, but is there a better/easier way to profile this library? Or... maybe we could set up a unit test with known RFX data that can be run through a number of iterations and then time it?
Any other thoughts? -Steve
diff --git a/X11/xf_decode.c b/X11/xf_decode.c index 092aced..acd40c8 100644 --- a/X11/xf_decode.c +++ b/X11/xf_decode.c @@ -87,6 +87,8 @@ xf_decode_frame(xfInfo * xfi, int x, int y, uint8 * bitmapData, uint32 bitmapDat } rfx_message_free(xfi->rfx_context, message); + XSetClipMask(xfi->display, xfi->gc, None); + break; default: diff --git a/include/freerdp/rfx.h b/include/freerdp/rfx.h index a5a9ed8..346a26f 100644 --- a/include/freerdp/rfx.h +++ b/include/freerdp/rfx.h @@ -137,14 +137,20 @@ struct _RFX_CONTEXT RFX_POOL* pool; /* memory pool */ - uint32 y_buffer[4096]; /* 4096 = 64x64 */ - uint32 cr_buffer[4096]; /* 4096 = 64x64 */ - uint32 cb_buffer[4096]; /* 4096 = 64x64 */ - + uint32 y_r_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */ + uint32 cb_g_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */ + uint32 cr_b_mem[4096+4]; /* 4096 = 64x64 (+ 4x4 = 16 for mem align) */ + + uint32* y_r_buffer; + uint32* cb_g_buffer; + uint32* cr_b_buffer; + uint32 idwt_buffer_8[256]; /* sub-band width 8 */ uint32 idwt_buffer_16[1024]; /* sub-band width 16 */ uint32 idwt_buffer_32[4096]; /* sub-band width 32 */ uint32* idwt_buffers[5]; /* sub-band buffer array */ + + void (* decode_YCbCr_to_RGB)(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf); }; typedef struct _RFX_CONTEXT RFX_CONTEXT; diff --git a/libfreerdp-rfx/librfx.c b/libfreerdp-rfx/librfx.c index a08cdfc..572f1e5 100644 --- a/libfreerdp-rfx/librfx.c +++ b/libfreerdp-rfx/librfx.c @@ -20,6 +20,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <inttypes.h> #include <freerdp/rfx.h> #include <freerdp/types/base.h> #include <freerdp/utils/stream.h> @@ -39,10 +40,26 @@ rfx_context_new(void) context->pool = rfx_pool_new(); + // align buffers to 16 byte boundary (needed for SSE/SSE2 instructions) + context->y_r_buffer = (uint32 *)(((uintptr_t)context->y_r_mem + 16) & ~ 0x0F); + context->cb_g_buffer = (uint32 *)(((uintptr_t)context->cb_g_mem + 16) & ~ 0x0F); + context->cr_b_buffer = (uint32 *)(((uintptr_t)context->cr_b_mem + 16) & ~ 0x0F); + context->idwt_buffers[1] = (uint32*) context->idwt_buffer_8; context->idwt_buffers[2] = (uint32*) context->idwt_buffer_16; context->idwt_buffers[4] = (uint32*) context->idwt_buffer_32; + + + //blah + context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB; + + // TODO: how to best tell if sse/sse2 is available and desired? + if (1) + { + context->decode_YCbCr_to_RGB = rfx_decode_YCbCr_to_RGB_sse; + } + return context; } diff --git a/libfreerdp-rfx/rfx_decode.c b/libfreerdp-rfx/rfx_decode.c index ff51f69..e61e126 100644 --- a/libfreerdp-rfx/rfx_decode.c +++ b/libfreerdp-rfx/rfx_decode.c @@ -24,11 +24,108 @@ #include "rfx_differential.h" #include "rfx_quantization.h" #include "rfx_dwt.h" - #include "rfx_decode.h" +#include <inttypes.h> +#include "xmmintrin.h" +#include "emmintrin.h" + #define MINMAX(_v,_l,_h) ((_v) < (_l) ? (_l) : ((_v) > (_h) ? (_h) : (_v))) +/* TODO: move these sse helpers to a seperate include file */ + +static __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_between_ps (__m128 val, __m128 min, __m128 max) +{ + __m128 ret; + ret = _mm_max_ps(val, min); + return _mm_min_ps(ret, max); +} + +static __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_epi32_and_store (__m128i * loc, __m128 val) +{ + __m128i tmp; + tmp = _mm_cvtps_epi32(val); + _mm_stream_si128(loc, tmp); +} + +void +rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buffer, uint32 * cb_g_buffer, uint32 * cr_b_buffer) +{ + __m128 y_add = _mm_set_ps1(128.0f); + __m128 r_cr_t = _mm_set_ps1(1.403f); + __m128 g_cb_t = _mm_set_ps1(-0.344f); + __m128 g_cr_t = _mm_set_ps1(-0.714f); + __m128 b_cb_t = _mm_set_ps1(1.77f); + + __m128 min = _mm_set_ps1(0.0f); + __m128 max = _mm_set_ps1(255.0f); + + __m128 y, cb, cr; + __m128 r, g, b, tmp; + + __m128i * y_r_buf = (__m128i*) y_r_buffer; + __m128i * cb_g_buf = (__m128i*) cb_g_buffer; + __m128i * cr_b_buf = (__m128i*) cr_b_buffer; + + int i; + for (i = 0; i < (4096 / 4); i++) + { + y = _mm_cvtepi32_ps(*y_r_buf); + cb = _mm_cvtepi32_ps(*cb_g_buf); + cr = _mm_cvtepi32_ps(*cr_b_buf); + + // y = y + 128 + y = _mm_add_ps(y, y_add); + + // r = between(y + (cr * 1.403), 0, 255) + r = _mm_mul_ps(cr, r_cr_t); + r = _mm_add_ps(r, y); + r = _mm_between_ps(r, min, max); + _mm_cvtps_epi32_and_store(y_r_buf, r); + + // g = between(y + (cb * -0.344) + (cr * -0.714), 0, 255) + g = _mm_mul_ps(cb, g_cb_t); + tmp = _mm_mul_ps(cr, g_cr_t); + g = _mm_add_ps(g, tmp); + g = _mm_add_ps(g, y); + g = _mm_between_ps(g, min, max); + _mm_cvtps_epi32_and_store(cb_g_buf, g); + + // b = between(y + (cb * 1.77), 0, 255) + b = _mm_mul_ps(cb, b_cb_t); + b = _mm_add_ps(b, y); + b = _mm_between_ps(b, min, max); + _mm_cvtps_epi32_and_store(cr_b_buf, b); + + y_r_buf++; + cb_g_buf++; + cr_b_buf++; + } +} + +void +rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf) +{ + int y, cb, cr; + int r, g, b; + + int i; + for (i = 0; i < 4096; i++) + { + y = y_r_buf[i] + 128; + cb = cb_g_buf[i]; + cr = cr_b_buf[i]; + r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)); + y_r_buf[i] = MINMAX(r, 0, 255); + g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5))); + cb_g_buf[i] = MINMAX(g, 0, 255); + b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)); + cr_b_buf[i] = MINMAX(b, 0, 255); + } +} + static void rfx_decode_component(RFX_CONTEXT * context, const uint32 * quantization_values, int half, const uint8 * data, int size, uint32 * buffer) @@ -67,23 +164,17 @@ rfx_decode_rgb(RFX_CONTEXT * context, int y, cb, cr; dst = rgb_buffer; - rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_buffer); - rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_buffer); - rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_buffer); + rfx_decode_component(context, y_quants, 0, y_data, y_size, context->y_r_buffer); + rfx_decode_component(context, cb_quants, 0, cb_data, cb_size, context->cb_g_buffer); + rfx_decode_component(context, cr_quants, 0, cr_data, cr_size, context->cr_b_buffer); + + context->decode_YCbCr_to_RGB(context->y_r_buffer, context->cb_g_buffer, context->cr_b_buffer); for (i = 0; i < 4096; i++) { - y = context->y_buffer[i] + 128; - cb = context->cb_buffer[i]; - cr = context->cr_buffer[i]; - - r = (y + cr + (cr >> 2) + (cr >> 3) + (cr >> 5)); - r = MINMAX(r, 0, 255); - g = (y - ((cb >> 2) + (cb >> 4) + (cb >> 5)) - ((cr >> 1) + (cr >> 3) + (cr >> 4) + (cr >> 5))); - g = MINMAX(g, 0, 255); - b = (y + cb + (cb >> 1) + (cb >> 2) + (cb >> 6)); - b = MINMAX(b, 0, 255); - + r = context->y_r_buffer[i]; + g = context->cb_g_buffer[i]; + b = context->cr_b_buffer[i]; switch (context->pixel_format) { case RFX_PIXEL_FORMAT_BGRA: @@ -112,6 +203,5 @@ rfx_decode_rgb(RFX_CONTEXT * context, break; } } - return rgb_buffer; } diff --git a/libfreerdp-rfx/rfx_decode.h b/libfreerdp-rfx/rfx_decode.h index afcc965..02585a2 100644 --- a/libfreerdp-rfx/rfx_decode.h +++ b/libfreerdp-rfx/rfx_decode.h @@ -22,6 +22,12 @@ #include <freerdp/rfx.h> +void +rfx_decode_YCbCr_to_RGB_sse(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf); + +void +rfx_decode_YCbCr_to_RGB(uint32 * y_r_buf, uint32 * cb_g_buf, uint32 * cr_b_buf); + unsigned char * rfx_decode_rgb(RFX_CONTEXT * context, const uint8 * y_data, int y_size, const uint32 * y_quants,
libfreerdp-rfx/libfreerdp_rfx_la-rfx_decode.o: file format elf32-i386 Disassembly of section .text: 00000000 <rfx_decode_YCbCr_to_RGB>: 0: 55 push %ebp 1: 31 c0 xor %eax,%eax 3: 89 e5 mov %esp,%ebp 5: 57 push %edi 6: 56 push %esi 7: 53 push %ebx 8: 83 ec 10 sub $0x10,%esp b: 90 nop c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi 10: 8b 55 08 mov 0x8(%ebp),%edx 13: 8b 4d 0c mov 0xc(%ebp),%ecx 16: 8b 75 10 mov 0x10(%ebp),%esi 19: 8b 1c 82 mov (%edx,%eax,4),%ebx 1c: 8b 14 81 mov (%ecx,%eax,4),%edx 1f: 8b 0c 86 mov (%esi,%eax,4),%ecx 22: 83 eb 80 sub $0xffffff80,%ebx 25: 89 cf mov %ecx,%edi 27: 89 ce mov %ecx,%esi 29: c1 ff 03 sar $0x3,%edi 2c: 89 7d e4 mov %edi,-0x1c(%ebp) 2f: 89 cf mov %ecx,%edi 31: c1 ff 02 sar $0x2,%edi 34: 8d 3c 3b lea (%ebx,%edi,1),%edi 37: 01 cf add %ecx,%edi 39: 03 7d e4 add -0x1c(%ebp),%edi 3c: c1 fe 05 sar $0x5,%esi 3f: 89 75 ec mov %esi,-0x14(%ebp) 42: 89 7d f0 mov %edi,-0x10(%ebp) 45: 8b 75 f0 mov -0x10(%ebp),%esi 48: 31 ff xor %edi,%edi 4a: 03 75 ec add -0x14(%ebp),%esi 4d: 89 75 f0 mov %esi,-0x10(%ebp) 50: 78 0d js 5f <rfx_decode_YCbCr_to_RGB+0x5f> 52: 81 fe ff 00 00 00 cmp $0xff,%esi 58: 66 bf ff 00 mov $0xff,%di 5c: 0f 4e fe cmovle %esi,%edi 5f: 8b 75 08 mov 0x8(%ebp),%esi 62: 89 3c 86 mov %edi,(%esi,%eax,4) 65: 89 de mov %ebx,%esi 67: 89 d7 mov %edx,%edi 69: 2b 75 e4 sub -0x1c(%ebp),%esi 6c: c1 ff 02 sar $0x2,%edi 6f: 89 7d e8 mov %edi,-0x18(%ebp) 72: 29 fe sub %edi,%esi 74: 89 cf mov %ecx,%edi 76: d1 ff sar %edi 78: 29 fe sub %edi,%esi 7a: 89 d7 mov %edx,%edi 7c: c1 f9 04 sar $0x4,%ecx 7f: 29 ce sub %ecx,%esi 81: 89 d1 mov %edx,%ecx 83: c1 f9 05 sar $0x5,%ecx 86: 29 ce sub %ecx,%esi 88: 31 c9 xor %ecx,%ecx 8a: 2b 75 ec sub -0x14(%ebp),%esi 8d: c1 ff 04 sar $0x4,%edi 90: 29 fe sub %edi,%esi 92: 78 0b js 9f <rfx_decode_YCbCr_to_RGB+0x9f> 94: 81 fe ff 00 00 00 cmp $0xff,%esi 9a: b1 ff mov $0xff,%cl 9c: 0f 4e ce cmovle %esi,%ecx 9f: 8b 75 0c mov 0xc(%ebp),%esi a2: 03 5d e8 add -0x18(%ebp),%ebx a5: 89 0c 86 mov %ecx,(%esi,%eax,4) a8: 89 d1 mov %edx,%ecx aa: 01 d3 add %edx,%ebx ac: d1 f9 sar %ecx ae: 01 cb add %ecx,%ebx b0: 31 c9 xor %ecx,%ecx b2: c1 fa 06 sar $0x6,%edx b5: 01 da add %ebx,%edx b7: 78 0b js c4 <rfx_decode_YCbCr_to_RGB+0xc4> b9: 81 fa ff 00 00 00 cmp $0xff,%edx bf: b1 ff mov $0xff,%cl c1: 0f 4e ca cmovle %edx,%ecx c4: 8b 7d 10 mov 0x10(%ebp),%edi c7: 89 0c 87 mov %ecx,(%edi,%eax,4) ca: 83 c0 01 add $0x1,%eax cd: 3d 00 10 00 00 cmp $0x1000,%eax d2: 0f 85 38 ff ff ff jne 10 <rfx_decode_YCbCr_to_RGB+0x10> d8: 83 c4 10 add $0x10,%esp db: 5b pop %ebx dc: 5e pop %esi dd: 5f pop %edi de: 5d pop %ebp df: c3 ret 000000e0 <rfx_decode_YCbCr_to_RGB_sse>: e0: 55 push %ebp e1: 0f 57 db xorps %xmm3,%xmm3 e4: 89 e5 mov %esp,%ebp e6: 8b 45 08 mov 0x8(%ebp),%eax e9: 8b 4d 0c mov 0xc(%ebp),%ecx ec: 8b 55 10 mov 0x10(%ebp),%edx ef: 0f 28 3d 00 00 00 00 movaps 0x0,%xmm7 f6: 53 push %ebx f7: 0f 28 35 10 00 00 00 movaps 0x10,%xmm6 fe: 8d 98 00 40 00 00 lea 0x4000(%eax),%ebx 104: 0f 28 15 50 00 00 00 movaps 0x50,%xmm2 10b: 90 nop 10c: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi 110: 0f 5b 2a cvtdq2ps (%edx),%xmm5 113: 0f 28 c5 movaps %xmm5,%xmm0 116: 0f 5b 08 cvtdq2ps (%eax),%xmm1 119: 0f 58 cf addps %xmm7,%xmm1 11c: 0f 5b 21 cvtdq2ps (%ecx),%xmm4 11f: 0f 59 c6 mulps %xmm6,%xmm0 122: 0f 59 2d 30 00 00 00 mulps 0x30,%xmm5 129: 0f 58 c1 addps %xmm1,%xmm0 12c: 0f 5f c3 maxps %xmm3,%xmm0 12f: 0f 5d c2 minps %xmm2,%xmm0 132: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0 136: 66 0f e7 00 movntdq %xmm0,(%eax) 13a: 0f 28 05 20 00 00 00 movaps 0x20,%xmm0 141: 83 c0 10 add $0x10,%eax 144: 0f 59 c4 mulps %xmm4,%xmm0 147: 0f 58 c5 addps %xmm5,%xmm0 14a: 0f 58 c1 addps %xmm1,%xmm0 14d: 0f 5f c3 maxps %xmm3,%xmm0 150: 0f 5d c2 minps %xmm2,%xmm0 153: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0 157: 66 0f e7 01 movntdq %xmm0,(%ecx) 15b: 0f 28 05 40 00 00 00 movaps 0x40,%xmm0 162: 83 c1 10 add $0x10,%ecx 165: 0f 59 c4 mulps %xmm4,%xmm0 168: 0f 58 c1 addps %xmm1,%xmm0 16b: 0f 5f c3 maxps %xmm3,%xmm0 16e: 0f 5d c2 minps %xmm2,%xmm0 171: 66 0f 5b c0 cvtps2dq %xmm0,%xmm0 175: 66 0f e7 02 movntdq %xmm0,(%edx) 179: 83 c2 10 add $0x10,%edx 17c: 39 d8 cmp %ebx,%eax 17e: 75 90 jne 110 <rfx_decode_YCbCr_to_RGB_sse+0x30> 180: 5b pop %ebx 181: 5d pop %ebp 182: c3 ret 183: 8d b6 00 00 00 00 lea 0x0(%esi),%esi 189: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi 00000190 <T.68>: 190: 55 push %ebp 191: 89 e5 mov %esp,%ebp 193: 57 push %edi 194: 56 push %esi 195: 89 d6 mov %edx,%esi 197: 53 push %ebx 198: 89 c3 mov %eax,%ebx 19a: 83 ec 4c sub $0x4c,%esp 19d: 8b 45 08 mov 0x8(%ebp),%eax 1a0: 8b 7d 0c mov 0xc(%ebp),%edi 1a3: 89 4c 24 04 mov %ecx,0x4(%esp) 1a7: c7 44 24 10 00 10 00 movl $0x1000,0x10(%esp) 1ae: 00 1af: 89 44 24 08 mov %eax,0x8(%esp) 1b3: 8b 43 08 mov 0x8(%ebx),%eax 1b6: 89 7c 24 0c mov %edi,0xc(%esp) 1ba: 89 04 24 mov %eax,(%esp) 1bd: e8 fc ff ff ff call 1be <T.68+0x2e> 1c2: 8d 8f 00 3f 00 00 lea 0x3f00(%edi),%ecx 1c8: 89 0c 24 mov %ecx,(%esp) 1cb: 89 4d e4 mov %ecx,-0x1c(%ebp) 1ce: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) 1d5: 00 1d6: e8 fc ff ff ff call 1d7 <T.68+0x47> 1db: 8b 46 20 mov 0x20(%esi),%eax 1de: 89 3c 24 mov %edi,(%esp) 1e1: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp) 1e8: 00 1e9: 89 44 24 08 mov %eax,0x8(%esp) 1ed: e8 fc ff ff ff call 1ee <T.68+0x5e> 1f2: 8b 46 1c mov 0x1c(%esi),%eax 1f5: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp) 1fc: 00 1fd: 89 44 24 08 mov %eax,0x8(%esp) 201: 8d 87 00 10 00 00 lea 0x1000(%edi),%eax 207: 89 04 24 mov %eax,(%esp) 20a: e8 fc ff ff ff call 20b <T.68+0x7b> 20f: 8b 46 24 mov 0x24(%esi),%eax 212: c7 44 24 04 00 04 00 movl $0x400,0x4(%esp) 219: 00 21a: 89 44 24 08 mov %eax,0x8(%esp) 21e: 8d 87 00 20 00 00 lea 0x2000(%edi),%eax 224: 89 04 24 mov %eax,(%esp) 227: e8 fc ff ff ff call 228 <T.68+0x98> 22c: 8b 46 14 mov 0x14(%esi),%eax 22f: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp) 236: 00 237: 89 44 24 08 mov %eax,0x8(%esp) 23b: 8d 87 00 30 00 00 lea 0x3000(%edi),%eax 241: 89 45 d4 mov %eax,-0x2c(%ebp) 244: 89 04 24 mov %eax,(%esp) 247: e8 fc ff ff ff call 248 <T.68+0xb8> 24c: 8b 56 10 mov 0x10(%esi),%edx 24f: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp) 256: 00 257: 89 54 24 08 mov %edx,0x8(%esp) 25b: 8d 97 00 34 00 00 lea 0x3400(%edi),%edx 261: 89 14 24 mov %edx,(%esp) 264: e8 fc ff ff ff call 265 <T.68+0xd5> 269: 8b 56 18 mov 0x18(%esi),%edx 26c: c7 44 24 04 00 01 00 movl $0x100,0x4(%esp) 273: 00 274: 89 54 24 08 mov %edx,0x8(%esp) 278: 8d 97 00 38 00 00 lea 0x3800(%edi),%edx 27e: 89 14 24 mov %edx,(%esp) 281: e8 fc ff ff ff call 282 <T.68+0xf2> 286: 8b 56 08 mov 0x8(%esi),%edx 289: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) 290: 00 291: 89 54 24 08 mov %edx,0x8(%esp) 295: 8d 97 00 3c 00 00 lea 0x3c00(%edi),%edx 29b: 89 14 24 mov %edx,(%esp) 29e: 89 55 e0 mov %edx,-0x20(%ebp) 2a1: e8 fc ff ff ff call 2a2 <T.68+0x112> 2a6: 8b 46 04 mov 0x4(%esi),%eax 2a9: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) 2b0: 00 2b1: 89 44 24 08 mov %eax,0x8(%esp) 2b5: 8d 87 00 3d 00 00 lea 0x3d00(%edi),%eax 2bb: 89 04 24 mov %eax,(%esp) 2be: e8 fc ff ff ff call 2bf <T.68+0x12f> 2c3: 8b 46 0c mov 0xc(%esi),%eax 2c6: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) 2cd: 00 2ce: 89 44 24 08 mov %eax,0x8(%esp) 2d2: 8d 87 70 3c 00 00 lea 0x3c70(%edi),%eax 2d8: 89 04 24 mov %eax,(%esp) 2db: e8 fc ff ff ff call 2dc <T.68+0x14c> 2e0: 8b 4d e4 mov -0x1c(%ebp),%ecx 2e3: 8b 36 mov (%esi),%esi 2e5: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) 2ec: 00 2ed: 89 0c 24 mov %ecx,(%esp) 2f0: 89 74 24 08 mov %esi,0x8(%esp) 2f4: e8 fc ff ff ff call 2f5 <T.68+0x165> 2f9: 8b 55 e0 mov -0x20(%ebp),%edx 2fc: 89 1c 24 mov %ebx,(%esp) 2ff: c7 44 24 08 08 00 00 movl $0x8,0x8(%esp) 306: 00 307: 89 54 24 04 mov %edx,0x4(%esp) 30b: e8 fc ff ff ff call 30c <T.68+0x17c> 310: 8b 45 d4 mov -0x2c(%ebp),%eax 313: 89 1c 24 mov %ebx,(%esp) 316: c7 44 24 08 10 00 00 movl $0x10,0x8(%esp) 31d: 00 31e: 89 44 24 04 mov %eax,0x4(%esp) 322: e8 fc ff ff ff call 323 <T.68+0x193> 327: 89 7c 24 04 mov %edi,0x4(%esp) 32b: 89 1c 24 mov %ebx,(%esp) 32e: c7 44 24 08 20 00 00 movl $0x20,0x8(%esp) 335: 00 336: e8 fc ff ff ff call 337 <T.68+0x1a7> 33b: 83 c4 4c add $0x4c,%esp 33e: 5b pop %ebx 33f: 5e pop %esi 340: 5f pop %edi 341: 5d pop %ebp 342: c3 ret 343: 8d b6 00 00 00 00 lea 0x0(%esi),%esi 349: 8d bc 27 00 00 00 00 lea 0x0(%edi,%eiz,1),%edi 00000350 <rfx_decode_rgb>: 350: 55 push %ebp 351: 89 e5 mov %esp,%ebp 353: 57 push %edi 354: 56 push %esi 355: 53 push %ebx 356: 83 ec 2c sub $0x2c,%esp 359: 8b 5d 08 mov 0x8(%ebp),%ebx 35c: 8b 4d 0c mov 0xc(%ebp),%ecx 35f: 8b 55 14 mov 0x14(%ebp),%edx 362: 8b 83 58 c0 00 00 mov 0xc058(%ebx),%eax 368: 89 44 24 04 mov %eax,0x4(%esp) 36c: 8b 45 10 mov 0x10(%ebp),%eax 36f: 89 04 24 mov %eax,(%esp) 372: 89 d8 mov %ebx,%eax 374: e8 17 fe ff ff call 190 <T.68> 379: 8b 83 5c c0 00 00 mov 0xc05c(%ebx),%eax 37f: 8b 4d 18 mov 0x18(%ebp),%ecx 382: 8b 55 20 mov 0x20(%ebp),%edx 385: 89 44 24 04 mov %eax,0x4(%esp) 389: 8b 45 1c mov 0x1c(%ebp),%eax 38c: 89 04 24 mov %eax,(%esp) 38f: 89 d8 mov %ebx,%eax 391: e8 fa fd ff ff call 190 <T.68> 396: 8b 83 60 c0 00 00 mov 0xc060(%ebx),%eax 39c: 8b 55 2c mov 0x2c(%ebp),%edx 39f: 8b 4d 24 mov 0x24(%ebp),%ecx 3a2: 89 44 24 04 mov %eax,0x4(%esp) 3a6: 8b 45 28 mov 0x28(%ebp),%eax 3a9: 89 04 24 mov %eax,(%esp) 3ac: 89 d8 mov %ebx,%eax 3ae: e8 dd fd ff ff call 190 <T.68> 3b3: 8b 83 60 c0 00 00 mov 0xc060(%ebx),%eax 3b9: 89 44 24 08 mov %eax,0x8(%esp) 3bd: 8b 83 5c c0 00 00 mov 0xc05c(%ebx),%eax 3c3: 89 44 24 04 mov %eax,0x4(%esp) 3c7: 8b 83 58 c0 00 00 mov 0xc058(%ebx),%eax 3cd: 89 04 24 mov %eax,(%esp) 3d0: ff 93 78 14 01 00 call *0x11478(%ebx) 3d6: 8b 55 30 mov 0x30(%ebp),%edx 3d9: 31 c0 xor %eax,%eax 3db: eb 24 jmp 401 <rfx_decode_rgb+0xb1> 3dd: 8d 76 00 lea 0x0(%esi),%esi 3e0: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx 3e4: c6 42 03 ff movb $0xff,0x3(%edx) 3e8: 88 0a mov %cl,(%edx) 3ea: 89 f1 mov %esi,%ecx 3ec: 88 4a 01 mov %cl,0x1(%edx) 3ef: 89 f9 mov %edi,%ecx 3f1: 88 4a 02 mov %cl,0x2(%edx) 3f4: 83 c2 04 add $0x4,%edx 3f7: 83 c0 04 add $0x4,%eax 3fa: 3d 00 40 00 00 cmp $0x4000,%eax 3ff: 74 56 je 457 <rfx_decode_rgb+0x107> 401: 8b 8b 58 c0 00 00 mov 0xc058(%ebx),%ecx 407: 8b 3c 01 mov (%ecx,%eax,1),%edi 40a: 8b 8b 5c c0 00 00 mov 0xc05c(%ebx),%ecx 410: 8b 34 01 mov (%ecx,%eax,1),%esi 413: 8b 8b 60 c0 00 00 mov 0xc060(%ebx),%ecx 419: 8b 0c 01 mov (%ecx,%eax,1),%ecx 41c: 89 4d e4 mov %ecx,-0x1c(%ebp) 41f: 8b 4b 18 mov 0x18(%ebx),%ecx 422: 83 f9 01 cmp $0x1,%ecx 425: 74 59 je 480 <rfx_decode_rgb+0x130> 427: 72 b7 jb 3e0 <rfx_decode_rgb+0x90> 429: 83 f9 02 cmp $0x2,%ecx 42c: 74 3a je 468 <rfx_decode_rgb+0x118> 42e: 83 f9 03 cmp $0x3,%ecx 431: 8d b4 26 00 00 00 00 lea 0x0(%esi,%eiz,1),%esi 438: 75 bd jne 3f7 <rfx_decode_rgb+0xa7> 43a: 89 f9 mov %edi,%ecx 43c: 83 c0 04 add $0x4,%eax 43f: 88 0a mov %cl,(%edx) 441: 89 f1 mov %esi,%ecx 443: 88 4a 01 mov %cl,0x1(%edx) 446: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx 44a: 88 4a 02 mov %cl,0x2(%edx) 44d: 83 c2 03 add $0x3,%edx 450: 3d 00 40 00 00 cmp $0x4000,%eax 455: 75 aa jne 401 <rfx_decode_rgb+0xb1> 457: 8b 45 30 mov 0x30(%ebp),%eax 45a: 83 c4 2c add $0x2c,%esp 45d: 5b pop %ebx 45e: 5e pop %esi 45f: 5f pop %edi 460: 5d pop %ebp 461: c3 ret 462: 8d b6 00 00 00 00 lea 0x0(%esi),%esi 468: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx 46c: 88 0a mov %cl,(%edx) 46e: 89 f1 mov %esi,%ecx 470: 88 4a 01 mov %cl,0x1(%edx) 473: 89 f9 mov %edi,%ecx 475: 88 4a 02 mov %cl,0x2(%edx) 478: 83 c2 03 add $0x3,%edx 47b: e9 77 ff ff ff jmp 3f7 <rfx_decode_rgb+0xa7> 480: 89 f9 mov %edi,%ecx 482: 88 0a mov %cl,(%edx) 484: 89 f1 mov %esi,%ecx 486: 88 4a 01 mov %cl,0x1(%edx) 489: 0f b6 4d e4 movzbl -0x1c(%ebp),%ecx 48d: c6 42 03 ff movb $0xff,0x3(%edx) 491: 88 4a 02 mov %cl,0x2(%edx) 494: 83 c2 04 add $0x4,%edx 497: e9 5b ff ff ff jmp 3f7 <rfx_decode_rgb+0xa7> Disassembly of section .rodata.cst16: 00000000 <.rodata.cst16>: 0: 00 00 add %al,(%eax) 2: 00 43 00 add %al,0x0(%ebx) 5: 00 00 add %al,(%eax) 7: 43 inc %ebx 8: 00 00 add %al,(%eax) a: 00 43 00 add %al,0x0(%ebx) d: 00 00 add %al,(%eax) f: 43 inc %ebx 10: 81 95 b3 3f 81 95 b3 adcl $0x95813fb3,-0x6a7ec04d(%ebp) 17: 3f 81 95 1a: b3 3f mov $0x3f,%bl 1c: 81 95 b3 3f c5 20 b0 adcl $0x20c5beb0,0x20c53fb3(%ebp) 23: be c5 20 26: b0 be mov $0xbe,%al 28: c5 20 lds (%eax),%esp 2a: b0 be mov $0xbe,%al 2c: c5 20 lds (%eax),%esp 2e: b0 be mov $0xbe,%al 30: b4 c8 mov $0xc8,%ah 32: 36 ss 33: bf b4 c8 36 bf mov $0xbf36c8b4,%edi 38: b4 c8 mov $0xc8,%ah 3a: 36 ss 3b: bf b4 c8 36 bf mov $0xbf36c8b4,%edi 40: 5c pop %esp 41: 8f (bad) 42: e2 3f loop 83 <rfx_decode_YCbCr_to_RGB+0x83> 44: 5c pop %esp 45: 8f (bad) 46: e2 3f loop 87 <rfx_decode_YCbCr_to_RGB+0x87> 48: 5c pop %esp 49: 8f (bad) 4a: e2 3f loop 8b <rfx_decode_YCbCr_to_RGB+0x8b> 4c: 5c pop %esp 4d: 8f (bad) 4e: e2 3f loop 8f <rfx_decode_YCbCr_to_RGB+0x8f> 50: 00 00 add %al,(%eax) 52: 7f 43 jg 97 <rfx_decode_YCbCr_to_RGB+0x97> 54: 00 00 add %al,(%eax) 56: 7f 43 jg 9b <rfx_decode_YCbCr_to_RGB+0x9b> 58: 00 00 add %al,(%eax) 5a: 7f 43 jg 9f <rfx_decode_YCbCr_to_RGB+0x9f> 5c: 00 00 add %al,(%eax) 5e: 7f 43 jg a3 <rfx_decode_YCbCr_to_RGB+0xa3> Disassembly of section .comment: 00000000 <.comment>: 0: 00 47 43 add %al,0x43(%edi) 3: 43 inc %ebx 4: 3a 20 cmp (%eax),%ah 6: 28 55 62 sub %dl,0x62(%ebp) 9: 75 6e jne 79 <rfx_decode_YCbCr_to_RGB+0x79> b: 74 75 je 82 <rfx_decode_YCbCr_to_RGB+0x82> d: 20 34 2e and %dh,(%esi,%ebp,1) 10: 34 2e xor $0x2e,%al 12: 33 2d 34 75 62 75 xor 0x75627534,%ebp 18: 6e outsb %ds:(%esi),(%dx) 19: 74 75 je 90 <rfx_decode_YCbCr_to_RGB+0x90> 1b: 35 29 20 34 2e xor $0x2e342029,%eax 20: 34 2e xor $0x2e,%al 22: 33 00 xor (%eax),%eax
------------------------------------------------------------------------------ EditLive Enterprise is the world's most technically advanced content authoring tool. Experience the power of Track Changes, Inline Image Editing and ensure content is compliant with Accessibility Checking. http://p.sf.net/sfu/ephox-dev2dev
_______________________________________________ Freerdp-devel mailing list Freerdp-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/freerdp-devel