On Thu, 23 Sep 2010 at 14:20:44 +0400, Alexey I. Froloff wrote:
> Inline assembly doesn't makes sense with modern compilers.

Point taken.

>From be5494e5e6d7da0018c46f1aff0c11eaa4cfe998 Mon Sep 17 00:00:00 2001
From: Carlos R. Mafra <[email protected]>
Date: Thu, 23 Sep 2010 14:32:12 +0200
Subject: [PATCH] Remove assembly/optimized code

Signed-off-by: Carlos R. Mafra <[email protected]>
---
 WINGs/Extras/Makefile.am |    2 -
 WINGs/Makefile.am        |    2 -
 WPrefs.app/Makefile.am   |    2 -
 configure.ac             |   56 ----
 src/Makefile.am          |    3 -
 wrlib/Makefile.am        |    9 -
 wrlib/convert.c          |   61 -----
 wrlib/libwraster.map     |    4 -
 wrlib/x86_specific.c     |  629 ----------------------------------------------
 9 files changed, 0 insertions(+), 768 deletions(-)
 delete mode 100644 wrlib/x86_specific.c

diff --git a/WINGs/Extras/Makefile.am b/WINGs/Extras/Makefile.am
index ca984ea..ef2b9ef 100644
--- a/WINGs/Extras/Makefile.am
+++ b/WINGs/Extras/Makefile.am
@@ -20,8 +20,6 @@ libExtraWINGs_la_SOURCES =    \
        wtableview.h \
        wtabledelegates.h 
 
-AM_CFLAGS = @NOSTRICTALIASING@
-
 INCLUDES = -I$(top_srcdir)/wrlib -I$(top_srcdir)/WINGs \
        -DRESOURCE_PATH=\"$(datadir)/WINGs\" @HEADER_SEARCH_PATH@ -DDEBUG
 
diff --git a/WINGs/Makefile.am b/WINGs/Makefile.am
index 90770d0..a8294bf 100644
--- a/WINGs/Makefile.am
+++ b/WINGs/Makefile.am
@@ -84,8 +84,6 @@ libWUtil_la_SOURCES =         \
 
 AM_CPPFLAGS = -DLOCALEDIR=\"$(NLSDIR)\" -DRESOURCE_PATH=\"$(datadir)/WINGs\" 
-DDEBUG
 
-AM_CFLAGS = @NOSTRICTALIASING@
-
 INCLUDES = -I$(top_srcdir)/WINGs/WINGs -I$(top_srcdir)/wrlib 
-I$(top_srcdir)/src \
        @XFTFLAGS@ @HEADER_SEARCH_PATH@
 
diff --git a/WPrefs.app/Makefile.am b/WPrefs.app/Makefile.am
index 393756b..497c426 100644
--- a/WPrefs.app/Makefile.am
+++ b/WPrefs.app/Makefile.am
@@ -44,8 +44,6 @@ WPrefs_SOURCES = \
 
 AM_CPPFLAGS = -DLOCALEDIR=\"$(NLSDIR)\" -DRESOURCE_PATH=\"$(wpdatadir)\"
 
-AM_CFLAGS = @NOSTRICTALIASING@
-
 INCLUDES = -I$(top_srcdir)/wrlib -I$(top_srcdir)/WINGs @HEADER_SEARCH_PATH@ 
 
 WPrefs_DEPENDENCIES = $(top_builddir)/WINGs/libWINGs.la
diff --git a/configure.ac b/configure.ac
index 35816e9..0a98942 100644
--- a/configure.ac
+++ b/configure.ac
@@ -218,60 +218,6 @@ AC_C_CONST
 AC_TYPE_SIGNAL
 
 
-
-dnl Compiler/architecture specific optimizations
-dnl ============================================
-
-
-dnl GCC/as with MMX support
-dnl -----------------------
-
-# until we fix it, leave it disabled
-asm_support=no
-mmx_support=no
-
-check_for_mmx_support=yes
-AC_ARG_ENABLE(mmx, AS_HELP_STRING([--disable-mmx], [disable compilation of MMX 
inline assembly]),
-   [if test x$enableval != xyes; then
-    check_for_mmx_support=no
-    fi])
-
-if test "$ac_cv_prog_gcc" = yes -a "$check_for_mmx_support" = yes; then
-case $host_cpu in
-*i?86*)
-
-    # gcc-3.3 or newer complains about some of our stuff without this
-    NOSTRICTALIASING="-fno-strict-aliasing"
-
-    AC_CACHE_CHECK(whether gcc supports x86 inline asm,
-                   ac_cv_c_inline_asm,
-                   [AC_TRY_LINK(,[{int x; asm volatile("movl %%eax, %%ebx\n\t 
pushal\n\t popal"::
-                          "m" (x),"m" (x),"m" (x),"m" (x),"m" (x),"m" (x),
-                          "m" (x),"m" (x),"m" (x),"m" (x),"m" (x),"m" (x));}],
-                   ac_cv_c_inline_asm=yes,
-                   ac_cv_c_inline_asm=no)])
-
-    if test "x$ac_cv_c_inline_asm" = xyes; then
-       AC_DEFINE(ASM_X86, 1, [define if processor is x86 (normally detected by 
configure)])
-       asm_support=yes
-
-       AC_CACHE_CHECK(whether gcc supports MMX(tm) inline asm,
-                      ac_cv_c_inline_mmx,
-                      [AC_TRY_LINK(,[asm ("movq %mm0, %mm1");],
-                      ac_cv_c_inline_mmx=yes,
-                      ac_cv_c_inline_mmx=no)])
-
-       if test "x$ac_cv_c_inline_mmx" = xyes; then
-          AC_DEFINE(ASM_X86_MMX, 1, [define if processor is x86 with MMX(tm) 
support (normally autodetected by configure)])
-          mmx_support=yes
-       fi
-    fi
-    ;;
-esac
-fi
-AC_SUBST(NOSTRICTALIASING)
-
-
 dnl pkg-config
 dnl ==========
 dnl AC_ARG_VAR(PKGCONFIG, [pkg-config command])
@@ -1045,8 +991,6 @@ echo "Installation path prefix            : $prefix"
 echo "Installation path for binaries      : $_bindir"
 echo "Installation path for WPrefs.app    : $wprefs_base_dir" | sed -e 
's|\${prefix}|'"$prefix|"
 echo "Supported graphic format libraries  : $supported_gfx"
-echo "Use assembly routines for wrlib     : $asm_support"
-echo "Use inline MMX(tm) x86 assembly     : $mmx_support"
 echo "Antialiased text support in WINGs   : $xft"
 echo "Xinerama extension support          : $xinerama"
 echo "XRandR extension support            : $xrandr"
diff --git a/src/Makefile.am b/src/Makefile.am
index f883f77..512c193 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -105,9 +105,6 @@ EXTRA_wmaker_SOURCES = osdep_bsd.c osdep_darwin.c 
osdep_linux.c osdep_stub.c
 
 AM_CPPFLAGS = $(DFLAGS) -DLOCALEDIR=\"$(NLSDIR)\"
 
-AM_CFLAGS = @NOSTRICTALIASING@
-
-
 INCLUDES = \
        -I$(top_srcdir)/wrlib \
        -I$(top_srcdir)/WINGs @HEADER_SEARCH_PATH@ 
diff --git a/wrlib/Makefile.am b/wrlib/Makefile.am
index 32323f6..3cd7001 100644
--- a/wrlib/Makefile.am
+++ b/wrlib/Makefile.am
@@ -28,7 +28,6 @@ libwraster_la_SOURCES =       \
        xpixmap.c       \
        bench.h         \
        convert.c       \
-       x86_specific.c  \
        context.c       \
        misc.c          \
        scale.c         \
@@ -47,14 +46,6 @@ libwraster_la_SOURCES =      \
 LTCOMPILE2=`echo $(LTCOMPILE) | sed -e s/-fomit-frame-pointer//`
 COMPILE2=`echo $(COMPILE) | sed -e s/-fomit-frame-pointer//`
 
-# cant compile asm stuff with optimizations
-x86_specific.lo: x86_specific.c
-       $(LTCOMPILE2) -O0 -c $<
-
-x86_specific.o: x86_specific.c
-       $(COMPILE2) -O0 -c $<
-
-
 INCLUDES = $(DFLAGS) @HEADER_SEARCH_PATH@
 
 libwraster_la_LIBADD = @LIBRARY_SEARCH_PATH@ @GFXLIBS@ @XLIBS@ -lm
diff --git a/wrlib/convert.c b/wrlib/convert.c
index 139f946..a05e363 100644
--- a/wrlib/convert.c
+++ b/wrlib/convert.c
@@ -25,49 +25,19 @@
  */
 
 #include <config.h>
-
 #include <X11/Xlib.h>
 #include <X11/Xutil.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
-
 #include <assert.h>
 
 #include "wraster.h"
 
 #ifdef XSHM
 extern Pixmap R_CreateXImageMappedPixmap(RContext * context, RXImage * ximage);
-
 #endif
 
-#ifdef ASM_X86
-extern void x86_PseudoColor_32_to_8(unsigned char *image,
-                                   unsigned char *ximage,
-                                   char *err, char *nerr,
-                                   short *ctable,
-                                   int dr, int dg, int db,
-                                   unsigned long *pixels,
-                                   int cpc, int width, int height, int 
bytesPerPixel, int line_offset);
-#endif                         /* ASM_X86 */
-
-#ifdef ASM_X86_MMX
-
-extern int x86_check_mmx();
-
-extern void x86_mmx_TrueColor_32_to_16(unsigned char *image,
-                                      unsigned short *ximage,
-                                      short *err, short *nerr,
-                                      const unsigned short *rtable,
-                                      const unsigned short *gtable,
-                                      const unsigned short *btable,
-                                      int dr, int dg, int db,
-                                      unsigned int roffs,
-                                      unsigned int goffs,
-                                      unsigned int boffs, int width, int 
height, int line_offset);
-
-#endif                         /* ASM_X86_MMX */
-
 #define NFREE(n)  if (n) free(n)
 
 #define HAS_ALPHA(I)   ((I)->format == RRGBAFormat)
@@ -360,36 +330,6 @@ static RXImage *image2TrueColor(RContext * ctx, RImage * 
image)
                fputs("true color dither\n", stderr);
 #endif
 
-#ifdef ASM_X86_MMX
-               if (ctx->depth == 16 && HAS_ALPHA(image) && x86_check_mmx()) {
-                       short *err;
-                       short *nerr;
-
-                       err = malloc(8 * (image->width + 3));
-                       nerr = malloc(8 * (image->width + 3));
-                       if (!err || !nerr) {
-                               NFREE(err);
-                               NFREE(nerr);
-                               RErrorCode = RERR_NOMEMORY;
-                               RDestroyXImage(ctx, ximg);
-                               return NULL;
-                       }
-                       memset(err, 0, 8 * (image->width + 3));
-                       memset(nerr, 0, 8 * (image->width + 3));
-
-                       x86_mmx_TrueColor_32_to_16(image->data,
-                                                  (unsigned short 
*)ximg->image->data,
-                                                  err + 8, nerr + 8,
-                                                  rtable, gtable, btable,
-                                                  dr, dg, db,
-                                                  roffs, goffs, boffs,
-                                                  image->width, image->height,
-                                                  ximg->image->bytes_per_line 
- 2 * image->width);
-
-                       free(err);
-                       free(nerr);
-               } else
-#endif                         /* ASM_X86_MMX */
                {
                        signed char *err;
                        signed char *nerr;
@@ -575,7 +515,6 @@ static RXImage *image2PseudoColor(RContext * ctx, RImage * 
image)
                memset(err, 0, 4 * (image->width + 3));
                memset(nerr, 0, 4 * (image->width + 3));
 
-               /*#ifdef ASM_X86 */
                convertPseudoColor_to_8(ximg, image, err + 4, nerr + 4,
                                        rtable, gtable, btable, dr, dg, db, 
ctx->pixels, cpc);
 
diff --git a/wrlib/libwraster.map b/wrlib/libwraster.map
index 9d05f2d..6282e2c 100644
--- a/wrlib/libwraster.map
+++ b/wrlib/libwraster.map
@@ -93,9 +93,5 @@ LIBWRASTER3
 # RSaveXPM
 # _wraster_change_filter
 # WRasterLibVersion
-# x86_check_mmx
-# x86_mmx_TrueColor_24_to_16
-# x86_mmx_TrueColor_32_to_16
-# x86_PseudoColor_32_to_8
     *;
 };
diff --git a/wrlib/x86_specific.c b/wrlib/x86_specific.c
deleted file mode 100644
index 85b4b12..0000000
--- a/wrlib/x86_specific.c
+++ /dev/null
@@ -1,629 +0,0 @@
-/* x86_convert.c - convert RImage to XImage with x86 optimizations
- *
- * Raster graphics library
- *
- * Copyright (c) 2000-2003 Alfredo K. Kojima
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Library General Public
- *  License as published by the Free Software Foundation; either
- *  version 2 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Library General Public License for more details.
- *
- *  You should have received a copy of the GNU Library General Public
- *  License along with this library; if not, write to the Free
- *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <config.h>
-
-#ifdef ASM_X86
-
-#ifdef ASM_X86_MMX
-
-int x86_check_mmx()
-{
-       static int result = -1;
-
-       if (result >= 0)
-               return result;
-
-       result = 0;
-
-       asm volatile (
-               "pushal                     \n\t"       /* please dont forget 
this in any asm */
-               "pushfl                     \n\t"       /* check whether cpuid 
supported */
-               "pop %%eax                  \n\t"
-               "movl %%eax, %%ebx          \n\t"
-               "xorl $(1<<21), %%eax       \n\t"
-               "pushl %%eax                \n\t"
-               "popfl                      \n\t"
-               "pushfl                     \n\t"
-               "popl %%eax                 \n\t"
-               "xorl %%ebx, %%eax          \n\t"
-               "andl $(1<<21), %%eax       \n\t"
-               "jz .NotPentium             \n\t"
-               "xorl %%eax, %%eax          \n\t"       /* no eax effect 
because of the movl below */
-                                                       /* except reseting 
flags. is it needed? */
-               "movl $1, %%eax             \n\t"
-               "cpuid                      \n\t"
-               "test $(1<<23), %%edx       \n\t"
-               "jz .NotMMX                 \n\t"
-               "popal                      \n\t"       /* popal needed because 
the address of */
-               "movl $1, %0                \n\t"       /* variable %0 may be 
kept in a register */
-               "jmp .noPop                 \n\t"
-               ".NotMMX:                   \n\t"
-               ".NotPentium:               \n\t"
-               "popal                      \n\t"
-               ".noPop:                    \n\t"
-               : "=m" (result)
-       );
-
-       return result;
-}
-
-/*
- * TODO:
- *              32/8    24/8    32/16   24/16   32/24   24/24
- * PPlain       YES     YES
- * MMX                          DONE
- *
- *
- * - try to align stack (local variable space) into quadword boundary
- */
-void
-x86_mmx_TrueColor_32_to_16(unsigned char *image,
-                          unsigned short *ximage,
-                          short *err,
-                          short *nerr,
-                          unsigned short *rtable,
-                          unsigned short *gtable,
-                          unsigned short *btable,
-                          int dr,
-                          int dg,
-                          int db,
-                          unsigned int roffs,
-                          unsigned int goffs, unsigned int boffs, int width, 
int height, int line_offset)
-{
-       union {
-               long long rrggbbaa;
-               struct {
-                       short int rr, gg, bb, aa;
-               } words;
-       } rrggbbaa;
-
-       union {
-               long long pixel;
-               struct {
-                       short int rr, gg, bb, aa;
-               } words;
-       } pixel;
-
-       short *tmp_err;
-       short *tmp_nerr;
-       int x;
-
-       asm volatile (
-               "pushl %%ebx                            \n\t"
-               /* pack dr, dg and db into mm6 */
-               "movl %7, %%eax                         \n\t"
-               "movl %8, %%ebx                         \n\t"
-               "movl %9, %%ecx                         \n\t"
-               "movw %%ax, %16                         \n\t"
-               "movw %%bx, %17                         \n\t"
-               "movw %%cx, %18                         \n\t"
-               "movw $0, %19                           \n\t"
-               "movq %16, %%mm6                        \n\t"   /* dr dg db 0 */
-               /* pack 4|4|4|4 into mm7, for shifting (/16) */
-               "movl $0x00040004, %16                  \n\t"
-               "movl $0x00040004, %18                  \n\t"
-               "movq %16, %%mm7                        \n\t"
-               /* store constant values for using with mmx when dithering */
-               "movl $0x00070007, %16                  \n\t"
-               "movl $0x00070007, %18                  \n\t"
-               "movq %16, %%mm5                        \n\t"
-               "movl $0x00050005, %16                  \n\t"
-               "movl $0x00050005, %18                  \n\t"
-               "movq %16, %%mm4                        \n\t"
-               "movl $0x00030003, %16                  \n\t"
-               "movl $0x00030003, %18                  \n\t"
-               "movq %16, %%mm3                        \n\t"
-               /* process 1 pixel / cycle, each component treated as 16bit */
-               "movl %0, %%esi                         \n\t"   /* esi = 
image->data */
-               ".LoopYa:                               \n\t"
-               "movl %13, %%eax                        \n\t"
-               "movl %%eax, %26                        \n\t"   /* x = width */
-               "movl %14, %%eax                        \n\t"
-               "decl %%eax                             \n\t"   /* y-- */
-               "movl %%eax, %14                        \n\t"
-               "js .Enda                               \n\t"   /* if y < 0, 
goto end */
-               "andl $1, %%eax                         \n\t"
-               "jz .LoopY_1a                           \n\t"   /* if (y & 1) 
goto LoopY_1 */
-               ".LoopY_0a:                             \n\t"
-               "movl %2, %%ebx                         \n\t"   /* ebx = err */
-               "movl %%ebx, %25                        \n\t"   /* [-36] = err 
*/
-               "movl %3, %%eax                         \n\t"
-               "movl %%eax, %24                        \n\t"   /* [-32] = nerr 
*/
-               "jmp .LoopXa                            \n\t"
-               ".LoopY_1a:                             \n\t"
-               "movl %3, %%ebx                         \n\t"   /* ebx = nerr */
-               "movl %%ebx, %25                        \n\t"   /* [-36] = nerr 
*/
-               "movl %2, %%eax                         \n\t"
-               "movl %%eax, %24                        \n\t"   /* [-32] = eerr 
*/
-               ".align 16                              \n\t"
-               ".LoopXa:                               \n\t"
-               /* calculate errors and pixel components; depend on ebx, esi, 
mm6 */
-               "movq (%%ebx), %%mm1                    \n\t"   /* mm1 = 
error[0..3] */
-               "punpcklbw (%%esi), %%mm0               \n\t"   /* mm0 = 
image->data[0..3] */
-               "psrlw $8, %%mm0                        \n\t"   /* fixup mm0 */
-               "paddusb %%mm1, %%mm0                   \n\t"   /* mm0 = mm0 + 
mm1 (sat. to 255) */
-               "movq %%mm0, %20                        \n\t"   /* save the 
pixel */
-               "movzwl %20, %%ecx                      \n\t"   /* ecx = 
pixel.red */
-               "movl %4, %%edi                         \n\t"   /* edi = rtable 
*/
-               /* agi */
-               "leal (%%edi, %%ecx, 2), %%eax          \n\t"   /* eax = 
&rtable[pixel.red] */
-               /* agi */
-               "movw (%%eax), %%dx                     \n\t"   /* dx = 
rtable[pixel.red] */
-               "movw %%dx, %16                         \n\t"   /* save rr */
-               "movzwl %21, %%ecx                      \n\t"   /* ecx = 
pixel.green */
-               "movl %5, %%edi                         \n\t"   /* edi = gtable 
*/
-               /* agi */
-               "leal (%%edi, %%ecx, 2), %%eax          \n\t"   /* eax = 
&gtable[pixel.green] */
-               /* agi */
-               "movw (%%eax), %%dx                     \n\t"   /* dx = 
gtable[pixel.green] */
-               "movw %%dx, %17                         \n\t"   /* save gg */
-               "movzwl %22, %%ecx                      \n\t"   /* ecx = 
pixel.blue */
-               "movl %6, %%edi                         \n\t"   /* ebx = btable 
*/
-               /* agi */
-               "leal (%%edi, %%ecx, 2), %%eax          \n\t"   /* eax = 
&btable[pixel.blue] */
-               /* agi */
-               "movw (%%eax), %%dx                     \n\t"   /* dx = 
btable[pixel.blue] */
-               "movw %%dx, %18                         \n\t"   /* save bb */
-               "movw $0, %19                           \n\t"   /* save dummy 
aa */
-               "movq %16, %%mm1                        \n\t"   /* load mm1 
with rrggbbaa */
-               "pmullw %%mm6, %%mm1                    \n\t"   /* mm1 = 
rr*dr|... */
-               "psubsw %%mm1, %%mm0                    \n\t"   /* error = 
pixel - mm1 */
-               /* distribute the error; depend on mm0, mm7, mm3, mm4, mm5 */
-               "movl %25, %%ebx                        \n\t"
-               "movq %%mm0, %%mm1                      \n\t"
-               "pmullw %%mm5, %%mm1                    \n\t"   /* mm1 = mm1*7 
*/
-               "psrlw %%mm7, %%mm1                     \n\t"   /* mm1 = mm1/16 
*/
-               "paddw 8(%%ebx), %%mm1                  \n\t"
-               "movq %%mm1, 8(%%ebx)                   \n\t"   /* err[x+1,y] = 
rer*7/16 */
-               "movl %24, %%ebx                        \n\t"
-               "movq %%mm0, %%mm1                      \n\t"
-               "pmullw %%mm4, %%mm1                    \n\t"   /* mm1 = mm1*5 
*/
-               "psrlw %%mm7, %%mm1                     \n\t"   /* mm1 = mm1/16 
*/
-               "paddw -8(%%ebx), %%mm1                 \n\t"
-               "movq %%mm1, -8(%%ebx)                  \n\t"   /* err[x-1,y+1] 
+= rer*3/16 */
-               "movq %%mm0, %%mm1                      \n\t"
-               "pmullw %%mm3, %%mm1                    \n\t"   /* mm1 = mm1*3 
*/
-               "psrlw %%mm7, %%mm1                     \n\t"   /* mm1 = mm1/16 
*/
-               "paddw 8(%%ebx), %%mm1                  \n\t"
-               "movq %%mm1, (%%ebx)                    \n\t"   /* err[x,y+1] 
+= rer*5/16 */
-               "psrlw %%mm7, %%mm0                     \n\t"   /* mm0 = mm0/16 
*/
-               "movq %%mm0, 8(%%ebx)                   \n\t"   /* err[x+1,y+1] 
= rer/16 */
-               /* calculate final pixel value and store */
-               "movl %10, %%ecx                        \n\t"
-               "movw %16, %%ax                         \n\t"
-               "shlw %%cl, %%ax                        \n\t"   /* NP* ax = 
r<<roffs */
-               "movl %11, %%ecx                        \n\t"
-               "movw %17, %%bx                         \n\t"
-               "shlw %%cl, %%bx                        \n\t"   /* NP* */
-               "orw %%bx, %%ax                         \n\t"
-                "movl %12, %%ecx                        \n\t"
-               "movw %18, %%bx                         \n\t"
-               "shlw %%cl, %%bx                        \n\t"   /* NP* */
-               "orw %%bx, %%ax                         \n\t"
-               "movl %1, %%edx                         \n\t"
-               "movw %%ax, (%%edx)                     \n\t"
-               "addl $2, %%edx                         \n\t"   /* increment 
ximage */
-               "movl %%edx, %1                         \n\t"
-               /*  prepare for next iteration on X */
-               "addl $8, %24                           \n\t"   /* nerr += 8 */
-               "movl %25, %%ebx                        \n\t"
-               "addl $8, %%ebx                         \n\t"
-               "movl %%ebx, %25                        \n\t"   /* ebx = err += 
8 */
-               /* Note: in the last pixel, this would cause an invalid memory 
access
-                * because, punpcklbw is used (which reads 8 bytes) and the last
-                * pixel is only 4 bytes. This is no problem because the image 
data
-                * was allocated with extra 4 bytes when created. */
-               "addl $4, %%esi                         \n\t"   /* image->data 
+= 4 */
-               "decl %26                               \n\t"   /* x-- */
-               "jnz .LoopXa                            \n\t"   /* if x>0, goto 
.LoopX */
-               /* depend on edx */
-               "addl %15, %%edx                        \n\t"   /* add extra 
offset to ximage */
-               "movl %%edx, %1                         \n\t"
-               "jmp .LoopYa                            \n\t"
-               ".Enda:                                 \n\t"   /* THE END */
-               "emms                                   \n\t"
-               "popl %%ebx                             \n\t"
-               :
-               : "m" (image),                                  /* %0 */
-                 "m" (ximage),                                 /* %1 */
-                 "m" (err),                                    /* %2 */
-                 "m" (nerr),                                   /* %3 */
-                 "m" (rtable),                                 /* %4 */
-                 "m" (gtable),                                 /* %5 */
-                 "m" (btable),                                 /* %6 */
-                 "m" (dr),                                     /* %7 */
-                 "m" (dg),                                     /* %8 */
-                 "m" (db),                                     /* %9 */
-                 "m" (roffs),                                  /* %10 */
-                 "m" (goffs),                                  /* %11 */
-                 "m" (boffs),                                  /* %12 */
-                 "m" (width),                                  /* %13 */
-                 "m" (height),                                 /* %14 */
-                 "m" (line_offset),                            /* %15 */
-                 "m" (rrggbbaa.words.rr),                      /* %16 (access 
to rr) */
-                 "m" (rrggbbaa.words.gg),                      /* %17 (access 
to gg) */
-                 "m" (rrggbbaa.words.bb),                      /* %18 (access 
to bb) */
-                 "m" (rrggbbaa.words.aa),                      /* %19 (access 
to aa) */
-                 "m" (pixel.words.rr),                         /* %20 (access 
to pixel.r) */
-                 "m" (pixel.words.gg),                         /* %21 (access 
to pixel.g) */
-                 "m" (pixel.words.bb),                         /* %22 (access 
to pixel.b) */
-                 "m" (pixel.words.aa),                         /* %23 (access 
to pixel.a) */
-                 "m" (tmp_err),                                /* %24 */
-                 "m" (tmp_nerr),                               /* %25 */
-                 "m" (x)                                       /* %26 */
-               : "eax",
-                 "ecx",
-                 "edx",
-                 "esi",
-                 "edi"
-       );
-}
-
-void
-x86_mmx_TrueColor_24_to_16(unsigned char *image,
-                          unsigned short *ximage,
-                          short *err,
-                          short *nerr,
-                          short *rtable,
-                          short *gtable,
-                          short *btable,
-                          int dr,
-                          int dg,
-                          int db,
-                          unsigned int roffs,
-                          unsigned int goffs, unsigned int boffs, int width, 
int height, int line_offset)
-{
-       union {
-               long long rrggbbaa;
-               struct {
-                       short int rr, gg, bb, aa;
-               } words;
-       } rrggbbaa;
-
-       union {
-               long long pixel;
-               struct {
-                       short int rr, gg, bb, aa;
-               } words;
-       } pixel;
-
-       short *tmp_err;
-       short *tmp_nerr;
-
-       int x;
-       int w1;
-       int w2;
-
-       asm volatile (
-               "pushl %%ebx                            \n\t"
-               "movl %13, %%eax                        \n\t"   /* eax = width 
*/
-               "movl %%eax, %%ebx                      \n\t"
-               "shrl $2, %%eax                         \n\t"
-               "movl %%eax, %27                        \n\t"   /* w1 = width / 
4 */
-               "andl $3, %%ebx                         \n\t"
-               "movl %%ebx, %28                        \n\t"   /* w2 = width 
%% 4 */
-               ".LoopYc:                               \n\t"
-               "movl %13, %%eax                        \n\t"
-               "movl %%eax, %26                        \n\t"   /* x = width */
-               "decl %14                               \n\t"   /* height-- */
-               "js .Endc                               \n\t"   /* if height < 
0 then end */
-               "movl %14, %%eax                        \n\t"
-               "decl %%eax                             \n\t"   /* y-- */
-               "movl %%eax, %14                        \n\t"
-               "js .Endc                               \n\t"   /* if y < 0, 
goto end */
-               "andl $1, %%eax                         \n\t"
-               "jz .LoopY_1c                           \n\t"   /* if (y&1) 
goto LoopY_1 */
-               ".LoopY_0c:                             \n\t"
-               "movl %2, %%ebx                         \n\t"   /* ebx = err */
-               "movl %%ebx, %25                        \n\t"   /* [-36] = err 
*/
-               "movl %3, %%eax                         \n\t"
-               "movl %%eax, %24                        \n\t"   /* [-32] = nerr 
*/
-               "jmp .LoopX_1c                          \n\t"
-               ".LoopY_1c:                             \n\t"
-               "movl %3, %%ebx                         \n\t"   /* ebx = nerr */
-               "movl %%ebx, %25                        \n\t"   /* [-36] = nerr 
*/
-               "movl %2, %%eax                         \n\t"
-               "movl %%eax, %24                        \n\t"   /* [-32] = eerr 
*/
-               ".align 16                              \n\t"
-               "movl %%eax, %26                        \n\t"   /* x = w1 */
-               ".LoopX_1c:                             \n\t"
-               "decl %26                               \n\t"   /* x-- */
-               "js .Xend1_c                            \n\t"   /* if x < 0 
then end */
-               /* do conversion of 4 pixels */
-               "movq %2, %%mm0                         \n\t"   /* mm0 = err */
-               "jmp .LoopX_1c                          \n\t"
-               ".Xend1_c:                              \n\t"
-               "movl %28, %%eax                        \n\t"
-               "movl %%eax, %26                        \n\t"   /* x = w2 */
-               ".LoopX_2c:                             \n\t"
-               "decl %26                               \n\t"   /* x-- */
-               "js .Xend2_c                            \n\t"
-               /* do conversion */
-               "jmp .LoopX_2c                          \n\t"
-               ".Xend2_c:                              \n\t"
-               "movl %27, %%eax                        \n\t"
-               "jmp .LoopYc                            \n\t"
-               ".Endc:                                 \n\t"   /* THE END */
-               "emms                                   \n\t"
-               "popl %%ebx                             \n\t"
-               :
-               : "m" (image),                                  /* %0 */
-                 "m" (ximage),                                 /* %1 */
-                 "m" (err),                                    /* %2 */
-                 "m" (nerr),                                   /* %3 */
-                 "m" (rtable),                                 /* %4 */
-                 "m" (gtable),                                 /* %5 */
-                 "m" (btable),                                 /* %6 */
-                 "m" (dr),                                     /* %7 */
-                 "m" (dg),                                     /* %8 */
-                 "m" (db),                                     /* %9 */
-                 "m" (roffs),                                  /* %10 */
-                 "m" (goffs),                                  /* %11 */
-                 "m" (boffs),                                  /* %12 */
-                 "m" (width),                                  /* %13 */
-                 "m" (height),                                 /* %14 */
-                 "m" (line_offset),                            /* %15 */
-                 "m" (rrggbbaa.words.rr),                      /* %16 (access 
to rr) */
-                 "m" (rrggbbaa.words.gg),                      /* %17 (access 
to gg) */
-                 "m" (rrggbbaa.words.bb),                      /* %18 (access 
to bb) */
-                 "m" (rrggbbaa.words.aa),                      /* %19 (access 
to aa) */
-                 "m" (pixel.words.rr),                         /* %20 (access 
to pixel.r) */
-                 "m" (pixel.words.gg),                         /* %21 (access 
to pixel.g) */
-                 "m" (pixel.words.bb),                         /* %22 (access 
to pixel.b) */
-                 "m" (pixel.words.aa),                         /* %23 (access 
to pixel.a) */
-                 "m" (tmp_err),                                /* %24 */
-                 "m" (tmp_nerr),                               /* %25 */
-                 "m" (x),                                      /* %26 */
-                 "m" (w1),                                     /* %27 */
-                 "m" (w2)                                      /* %28 */
-               :
-                 "eax",
-                 "ecx",
-                 "edx",
-                 "esi",
-                 "edi"
-       );
-}
-
-#endif /* ASM_X86_MMX */
-
-void
-x86_PseudoColor_32_to_8(unsigned char *image,
-                       unsigned char *ximage,
-                       char *err,
-                       char *nerr,
-                       short *ctable,
-                       int dr,
-                       int dg,
-                       int db,
-                       unsigned long *pixels, int cpc, int width, int height, 
int bytesPerPixel, int line_offset)
-{
-       int x;
-       int cpcpc;
-
-       int rr;
-       int gg;
-       int bb;
-
-       char *tmp_err;
-       char *tmp_nerr;
-
-       char ndr;               // aparently not used
-       char ndg;               // aparently not used
-       char ndb;               // aparently not used
-
-       asm volatile (
-               "pushal                                 \n\t"
-               "movl %9, %%eax                         \n\t"
-               "mulb %9                                \n\t"
-               "movl %%eax, %15                        \n\t"   /* cpcpc = cpc 
* cpc */
-               /* eax will always be <= 0xffff */
-               /* process 1 pixel / cycle, each component treated as 16bit */
-               "movl %0, %%esi                         \n\t"   /* esi = 
image->data */
-               ".LoopYb:                               \n\t"
-               "movl %10, %%ecx                        \n\t"
-               "movl %%ecx, %14                        \n\t"   /* x = width */
-               "movl %11, %%ecx                        \n\t"
-               "decl %%ecx                             \n\t"   /* y-- */
-               "movl %%ecx, %11                        \n\t"
-               "js .Endb                               \n\t"   /* if y < 0, 
goto end */
-               "andl $1, %%ecx                         \n\t"
-               "jz .LoopY_1b                           \n\t"   /* if (y & 1) 
goto LoopY_1 */
-               ".LoopY_0b:                             \n\t"
-               "movl %2, %%ebx                         \n\t"   /* ebx = err */
-               /* "movl %%ebx, %20                     \n\t" */ /* [-36] = err 
*/ /* useless */
-               "movl %3, %%ecx                         \n\t"
-               "movl %%ecx, %19                        \n\t"   /* [-32] = nerr 
*/
-               "movl $0, (%%ecx)                       \n\t"   /* init error 
of nerr[0] to 0 */
-               "jmp .LoopXb                            \n\t"
-               ".LoopY_1b:                             \n\t"
-               "movl %3, %%ebx                         \n\t"   /* ebx = nerr */
-               /* "movl %%ebx, %20                     \n\t" */ /* [-36] = 
nerr */ /* useless */
-               "movl %2, %%ecx                         \n\t"
-               "movl %%ecx, %19                        \n\t"   /* [-32] = err 
*/
-               "movl $0, (%%ecx)                       \n\t"   /* init error 
of nerr[0] to 0 */
-               ".align 16                              \n\t"
-               ".LoopXb:                               \n\t"
-               "movl %4, %%edi                         \n\t"   /* edi = ctable 
*/
-               "xorl %%edx, %%edx                      \n\t"   /* zero the 
upper word on edx */
-               /* RED; depends on ebx==err, esi==image->data, edi */
-               "movzbw (%%esi), %%dx                   \n\t"   /* dx = 
image->data[0] */
-               "movsbw (%%ebx), %%ax                   \n\t"   /* ax = 
error[0] */
-               "addw %%ax, %%dx                        \n\t"   /* pixel.red = 
data[0] + error[0] */
-               "testb %%dh, %%dh                       \n\t"   /* test if 
pixel.red < 0 or > 255 */
-               "jz .OKRb                               \n\t"   /* 0 <= 
pixel.red <= 255 */
-               "js .NEGRb                              \n\t"   /* pixel.red < 
0 */
-               "movw $0xff, %%dx                       \n\t"   /* pixel.red > 
255 */
-               "jmp .OKRb                              \n\t"
-               ".NEGRb:                                \n\t"
-               "xorw %%dx, %%dx                        \n\t"
-               ".OKRb:                                 \n\t"
-               /* partial reg */
-               "leal (%%edi, %%edx, 2), %%ecx          \n\t"   /* ecx = 
&ctable[pixel.red]             */
-               /* agi */
-               "movl (%%ecx), %%eax                    \n\t"   /* ax = 
ctable[pixel.red]               */
-               "movw %%ax, %16                         \n\t"   /* save rr      
                        */
-               "mulb %5                                \n\t"   /* ax = rr*dr   
                        */
-               "subw %%ax, %%dx                        \n\t"   /* rer = dx = 
dx - rr*dr                */
-               "movswl %%dx, %%eax                     \n\t"   /* save rer     
                        */
-               /* distribute error */
-               "leal (, %%eax, 8), %%ecx               \n\t"
-               "subw %%dx, %%cx                        \n\t"   /* cx = rer * 7 
                        */
-               "sarw $4, %%cx                          \n\t"   /* cx = rer * 7 
/ 16                    */
-               "addb %%cl, 4(%%ebx)                    \n\t"   /* err[x+1] += 
rer * 7 / 16             */
-               "movl %19, %%ecx                        \n\t"   /* ecx = nerr   
                        */
-               "leaw (%%eax, %%eax, 4), %%dx           \n\t"   /* dx = rer * 5 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = rer * 5 
/ 16                    */
-               "addb %%dl, (%%ecx)                     \n\t"   /* nerr[x] += 
rer * 5 / 16              */
-               "leaw (%%eax, %%eax, 2), %%dx           \n\t"   /* dx = rer * 3 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = rer * 3 
/ 16                    */
-               "addb %%dl, -4(%%ecx)                   \n\t"   /* nerr[x-1] += 
rer * 3 / 16            */
-               "sarw $4, %%ax                          \n\t"   /* ax = rer / 
16                        */
-               "movb %%al, 4(%%ecx)                    \n\t"   /* nerr[x+1] = 
rer / 16                 */
-               /* GREEN; depends on ebx, esi, edi */
-               "movzbw 1(%%esi), %%dx                  \n\t"   /* dx = 
image->data[1]                  */
-               "movsbw 1(%%ebx), %%ax                  \n\t"   /* ax = 
error[1]                        */
-               "addw %%ax, %%dx                        \n\t"   /* pixel.grn = 
data[1] + error[1]       */
-               "testb %%dh, %%dh                       \n\t"   /* test if 
pixel.grn < 0 or > 255       */
-               "jz .OKGb                               \n\t"   /* 0 <= 
pixel.grn <= 255                */
-               "js .NEGGb                              \n\t"   /* pixel.grn < 
0                        */
-               "movw $0xff, %%dx                       \n\t"   /* pixel.grn > 
255                      */
-               "jmp .OKGb                              \n\t"
-               ".NEGGb:                                \n\t"
-               "xorw %%dx, %%dx                        \n\t"
-               ".OKGb:                                 \n\t"
-               /* partial reg */
-               "leal (%%edi, %%edx, 2), %%ecx          \n\t"   /* ecx = 
&ctable[pixel.grn]             */
-               /* agi */
-               "movw (%%ecx), %%ax                     \n\t"   /* ax = 
ctable[pixel.grn]               */
-               "movw %%ax, %17                         \n\t"   /* save gg      
                        */
-               "mulb %6                                \n\t"   /* ax = gg*dg   
                        */
-               "subw %%ax, %%dx                        \n\t"   /* ger = dx = 
dx - gg*dg                */
-               "movswl %%dx, %%eax                     \n\t"   /* save ger     
                        */
-               /* distribute error */
-               "leal (, %%eax, 8), %%ecx               \n\t"
-               "subw %%dx, %%cx                        \n\t"   /* cx = ger * 7 
                        */
-               "sarw $4, %%cx                          \n\t"   /* cx = ger * 7 
/ 16                    */
-               "addb %%cl, 5(%%ebx)                    \n\t"   /* err[x+1] += 
ger * 7 / 16             */
-               "movl %19, %%ecx                        \n\t"   /* ecx = nerr   
                        */
-               "leaw (%%eax, %%eax, 4), %%dx           \n\t"   /* dx = ger * 5 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = ger * 5 
/ 16                    */
-               "addb %%dl, 1(%%ecx)                    \n\t"   /* nerr[x] += 
ger * 5 / 16              */
-               "leaw (%%eax, %%eax, 2), %%dx           \n\t"   /* dx = ger * 3 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = ger * 3 
/ 16                    */
-               "addb %%dl, -3(%%ecx)                   \n\t"   /* nerr[x-1] += 
ger * 3 / 16            */
-               "sarw $4, %%ax                          \n\t"   /* ax = ger / 
16                        */
-               "movb %%al, 5(%%ecx)                    \n\t"   /* nerr[x+1] = 
ger / 16                 */
-               /* BLUE; depends on ebx, esi */
-               "movzbw 2(%%esi), %%dx                  \n\t"   /* dx = 
image->data[2]                  */
-               "movsbw 2(%%ebx), %%ax                  \n\t"   /* ax = 
error[2]                        */
-               "addw %%ax, %%dx                        \n\t"   /* pixel.grn = 
data[2] + error[2]       */
-               "testb %%dh, %%dh                       \n\t"   /* test if 
pixel.blu < 0 or > 255       */
-               "jz .OKBb                               \n\t"   /* 0 <= 
pixel.blu <= 255                */
-               "js .NEGBb                              \n\t"   /* pixel.blu < 
0                        */
-               "movw $0xff, %%dx                       \n\t"   /* pixel.blu > 
255                      */
-               "jmp .OKBb                              \n\t"
-               ".NEGBb:                                \n\t"
-               "xorw %%dx, %%dx                        \n\t"
-               ".OKBb:                                 \n\t"
-               /* partial reg */
-               "leal (%%edi, %%edx, 2), %%ecx          \n\t"   /* ecx = 
&ctable[pixel.blu]             */
-               /* agi */
-               "movw (%%ecx), %%ax                     \n\t"   /* ax = 
ctable[pixel.blu]               */
-               "movw %%ax, %18                         \n\t"   /* save bb      
                        */
-               "mulb %7                                \n\t"   /* ax = bb*db   
                        */
-               "subw %%ax, %%dx                        \n\t"   /* ber = dx = 
dx - bb*db                */
-               "movswl %%dx, %%eax                     \n\t"   /* save ber     
                        */
-               /* distribute error */
-               "leal (, %%eax, 8), %%ecx               \n\t"
-               "subw %%dx, %%cx                        \n\t"   /* cx = ber * 7 
                        */
-               "sarw $4, %%cx                          \n\t"   /* cx = ber * 7 
/ 16                    */
-               "addb %%cl, 6(%%ebx)                    \n\t"   /* err[x+1] += 
ber * 7 / 16             */
-               "movl %19, %%ecx                        \n\t"   /* ecx = nerr   
                        */
-               "leaw (%%eax, %%eax, 4), %%dx           \n\t"   /* dx = ber * 5 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = ber * 5 
/ 16                    */
-               "addb %%dl, 2(%%ecx)                    \n\t"   /* nerr[x] += 
ber * 5 / 16              */
-               "leaw (%%eax, %%eax, 2), %%dx           \n\t"   /* dx = ber * 3 
                        */
-               "sarw $4, %%dx                          \n\t"   /* dx = ber * 3 
/ 16                    */
-               "addb %%dl, -4(%%ecx)                   \n\t"   /* nerr[x-1] += 
ber * 3 / 16            */
-               "sarw $4, %%ax                          \n\t"   /* ax = ber / 
16                        */
-               "movb %%al, 6(%%ecx)                    \n\t"   /* nerr[x+1] = 
ber / 16                 */
-               "andl $0xffff, %%eax                    \n\t"
-               /* depends on eax & 0xffff0000 == 0
-                * calculate the index of the value of the pixel */
-               "movw %16, %%ax                         \n\t"   /* ax = rr      
                        */
-               "mulb %15                               \n\t"   /* ax = 
cpcpc*rr                        */
-               "movw %%ax, %%cx                        \n\t"
-               "movw %17, %%ax                         \n\t"   /* ax = gg      
                        */
-               "mulb %9                                \n\t"   /* ax = cpc*gg  
                        */
-               "addw %%cx, %%ax                        \n\t"   /* ax = cpc*gg 
+ cpcpc*rr               */
-               "addw %18, %%ax                         \n\t"   /* ax = 
cpcpc*rr + cpc*gg + bb          */
-               "movl %8, %%ecx                         \n\t"
-               /* agi */
-               "leal (%%ecx, %%eax, 4), %%edx          \n\t"
-               /* agi */
-               "movb (%%edx), %%cl                     \n\t"   /* cl = 
pixels[ax]                      */
-               /* store the pixel */
-               "movl %1, %%eax                         \n\t"
-               "movb %%cl, (%%eax)                     \n\t"   /* *ximage = cl 
                        */
-               "incl %1                                \n\t"   /* ximage++     
                        */
-               /* prepare for next iteration on X */
-               "addl $4, %19                           \n\t"   /* nerr += 4    
                        */
-               "addl $4, %%ebx                         \n\t"   /* err += 4     
                        */
-               "addl %12, %%esi                        \n\t"   /* image->data 
+= bpp                   */
-               "decl %14                               \n\t"   /* x--          
                        */
-               "jnz .LoopXb                            \n\t"   /* if x>0, goto 
.LoopX                  */
-               "movl %13, %%eax                        \n\t"
-               "addl %%eax, %1                         \n\t"   /* add extra 
offset to ximage           */
-               "jmp .LoopYb                            \n\t"
-               ".Endb:                                 \n\t"
-               "emms                                   \n\t"
-               "popal                                  \n\t"
-               :
-               : "m" (image),                                  /* %0 */
-                 "m" (ximage),                                 /* %1 */
-                 "m" (err),                                    /* %2 */
-                 "m" (nerr),                                   /* %3 */
-                 "m" (ctable),                                 /* %4 */
-                 "m" (dr),                                     /* %5 */
-                 "m" (dg),                                     /* %6 */
-                 "m" (db),                                     /* %7 */
-                 "m" (pixels),                                 /* %8 */
-                 "m" (cpc),                                    /* %9 */
-                 "m" (width),                                  /* %10 */
-                 "m" (height),                                 /* %11 */
-                 "m" (bytesPerPixel),                          /* %12 */
-                 "m" (line_offset),                            /* %13 */
-                 "m" (x),                                      /* %14 */
-                 "m" (cpcpc),                                  /* %15 */
-                 "m" (rr),                                     /* %16 */
-                 "m" (gg),                                     /* %17 */
-                 "m" (bb),                                     /* %18 */
-                 "m" (tmp_err),                                /* %19 */
-                 "m" (tmp_nerr),                               /* %20 */
-                 "m" (ndr),                                    /* %21 */
-                 "m" (ndg),                                    /* %22 */
-                 "m" (ndb)                                     /* %23 */
-       );
-}
-
-#endif /* ASM_X86 */
-- 
1.7.2.2.119.gf9c33



-- 
To unsubscribe, send mail to [email protected].

Reply via email to