Hi,

As you all might know 2d games tend to be slow on Wine. For a lot of games the 
main bottleneck is depth conversion which happens in cases when the depth 
requested by the game and the X desktop color are not the same.

As a way to speedup 2d Lionel assisted me with hacking wine's ddraw to let it 
use parts of the Direct3D backend. The final rendering is then done using 
OpenGL and the end result is that the videocard does the color conversion for 
us. The patch greatly improves the performance of 2d games which don't use 
GetDC/ReleaseDC a lot.

While the patch fixes the conversion bottleneck for various games it doesn't 
handle 8bit paletted which is used by games like StarCraft as OpenGL doesn't 
support this by default. The second patch which I attached aswell adds 
support for this. On cards (atleast all nvidia cards from geforce 1 to the 
fx) that support the opengl paletted texture extension this extension is 
used. It makes StarCraft very fast atleast on my Athlon XP2000 system with a 
GeforceFX where the game was slow before. As not all cards support paletted 
textures I emulated this using a simple fragment shader. (a 1D texture 
containing the palette is used as a loopup table)

The attached patches are still experimental and likely contain bugs so please 
test them. When the patches are applied set 
'HKCU\Software\Wine\DirectDraw\UseDDrawOverD3D' to Y else it won't do 
anything :)

Further note that lots of games like to use multiple threads for graphics... 
using the patch games use 'd3d' (or actually opengl) which adds more 
multithreaded d3d games :) Games like command&conquer, redalert, total 
annihilation and lots of others became multithreaded. (they crash quite 
quickly due to some critical section in x11drv)

All have fun with the patches and please report any issues that appear so that 
i can fix the patches and submit them to wine-patches,

Roderick Colenbrander

diff -u /var/wine/dlls/ddraw/ddraw_main.c ./ddraw_main.c
--- /var/wine/dlls/ddraw/ddraw_main.c	2005-11-13 21:18:42.000000000 -0500
+++ ./ddraw_main.c	2005-12-04 20:13:14.000000000 -0500
@@ -685,10 +685,14 @@
 	return E_POINTER; /* unchecked */
     }
 
-    if (pDDSD->ddsCaps.dwCaps & DDSCAPS_PRIMARYSURFACE)
-    {
+    if (pDDSD->ddsCaps.dwCaps & DDSCAPS_PRIMARYSURFACE) {
 	/* create primary surface & backbuffers */
 	hr = create_primary(This, pDDSD, ppSurf, pUnkOuter);
+	
+	if(opengl_initialized && use_ddraw_over_d3d) {
+	    IDirect3DDeviceImpl *dummy;
+	    d3ddevice_create(&dummy, This, *ppSurf, 1, D3D_BLIT);
+	}
     }
     else if (pDDSD->ddsCaps.dwCaps & DDSCAPS_BACKBUFFER)
     {
diff -u /var/wine/dlls/ddraw/ddraw_private.h ./ddraw_private.h
--- /var/wine/dlls/ddraw/ddraw_private.h	2005-10-10 15:53:37.000000000 -0400
+++ ./ddraw_private.h	2005-11-21 16:59:06.000000000 -0500
@@ -58,6 +58,9 @@
 
 #define MAKE_FOURCC(a,b,c,d) ((a << 0) | (b << 8) | (c << 16) | (d << 24))
 
+/* Settings */
+extern BOOL use_ddraw_over_d3d;
+
 /*****************************************************************************
  * IDirectDraw implementation structure
  */
diff -u /var/wine/dlls/ddraw/device_opengl.c ./device_opengl.c
--- /var/wine/dlls/ddraw/device_opengl.c	2005-11-13 21:18:42.000000000 -0500
+++ ./device_opengl.c	2005-12-04 20:02:40.000000000 -0500
@@ -256,8 +256,7 @@
     if (gl_d3d_dev->state[WINE_GL_BUFFER_BACK] == SURFACE_MEMORY_DIRTY) {
         d3d_dev->flush_to_framebuffer(d3d_dev, &(gl_d3d_dev->lock_rect[WINE_GL_BUFFER_BACK]), gl_d3d_dev->lock_surf[WINE_GL_BUFFER_BACK]);
     }
-    gl_d3d_dev->state[WINE_GL_BUFFER_BACK] = SURFACE_GL;
-    gl_d3d_dev->state[WINE_GL_BUFFER_FRONT] = SURFACE_GL;
+    
     glXSwapBuffers(gl_d3d_dev->display, (Drawable)drawable);
     LEAVE_GL();
     
@@ -3163,7 +3162,7 @@
 		opt_bitmap = d3ddevice_set_state_for_flush(This->d3ddevice, (LPCRECT) &rect, FALSE, &initial);
 		
 		if (upload_surface_to_tex_memory_init(This, 0, &gl_d3d_dev->current_internal_format,
-						      initial, FALSE, UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+						      initial, FALSE, UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE, gl_d3d_dev->d3d_mode) != DD_OK) {
 		    ERR(" unsupported pixel format at direct buffer to buffer copy.\n");
 		    LEAVE_GL();
 		    return DDERR_INVALIDPARAMS;
@@ -3271,7 +3270,7 @@
 		opt_bitmap = d3ddevice_set_state_for_flush(This->d3ddevice, (LPCRECT) &rect, ((dwFlags & DDBLT_KEYSRC) != 0), &initial);
 		
 		if (upload_surface_to_tex_memory_init(src_impl, 0, &gl_d3d_dev->current_internal_format,
-						      initial, ((dwFlags & DDBLT_KEYSRC) != 0), UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+						      initial, ((dwFlags & DDBLT_KEYSRC) != 0), UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE, gl_d3d_dev->d3d_mode) != DD_OK) {
 		    ERR(" unsupported pixel format at memory to buffer Blt override.\n");
 		    LEAVE_GL();
 		    return DDERR_INVALIDPARAMS;
@@ -3393,7 +3392,7 @@
     
     if (upload_surface_to_tex_memory_init(src_impl, 0, &gl_d3d_dev->current_internal_format,
 					  initial, (trans & DDBLTFAST_SRCCOLORKEY) != 0,
-					  UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+					  UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE, gl_d3d_dev->d3d_mode) != DD_OK) {
 	ERR(" unsupported pixel format at memory to buffer Blt override.\n");
 	LEAVE_GL();
 	return DDERR_INVALIDPARAMS;
@@ -3730,7 +3729,11 @@
 		      gl_d3d_dev->lock_rect[buffer_type].right, gl_d3d_dev->lock_rect[buffer_type].bottom);
 		d3d_dev->flush_to_framebuffer(d3d_dev, &(gl_d3d_dev->lock_rect[buffer_type]), gl_d3d_dev->lock_surf[buffer_type]);
 	    }
-	    gl_d3d_dev->state[buffer_type] = SURFACE_GL;
+	    if(gl_d3d_dev->d3d_mode == D3D_FULL)
+		gl_d3d_dev->state[buffer_type] = SURFACE_GL;
+	    else
+		gl_d3d_dev->state[buffer_type] = SURFACE_MEMORY_DIRTY;
+	    
 	    gl_d3d_dev->lock_rect[buffer_type] = *pRect;
 	}
 	/* In the other case, do not upgrade the locking rectangle as it's no need... */
@@ -3878,11 +3881,11 @@
     opt_bitmap = d3ddevice_set_state_for_flush(d3d_dev, pRect, FALSE, &initial);
     
     if (upload_surface_to_tex_memory_init(surf, 0, &gl_d3d_dev->current_internal_format,
-					  initial, FALSE, UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE) != DD_OK) {
+					  initial, FALSE, UNLOCK_TEX_SIZE, UNLOCK_TEX_SIZE, gl_d3d_dev->d3d_mode) != DD_OK) {
         ERR(" unsupported pixel format at frame buffer flush.\n");
 	return;
     }
-	
+
     for (y = pRect->top; y < pRect->bottom; y += UNLOCK_TEX_SIZE) {
 	RECT flush_rect;
 	
@@ -3933,7 +3936,7 @@
     WINE_GL_BUFFER_TYPE buffer_type;
     IDirect3DDeviceImpl *d3d_dev = This->d3ddevice;
     IDirect3DDeviceGLImpl* gl_d3d_dev = (IDirect3DDeviceGLImpl*) d3d_dev;
-  
+
     if ((This->surface_desc.ddsCaps.dwCaps & (DDSCAPS_FRONTBUFFER|DDSCAPS_PRIMARYSURFACE)) != 0) {
         buffer_type = WINE_GL_BUFFER_FRONT;
     } else if ((This->surface_desc.ddsCaps.dwCaps & (DDSCAPS_BACKBUFFER)) == (DDSCAPS_BACKBUFFER)) {
@@ -3992,7 +3995,7 @@
 }     
 
 HRESULT
-d3ddevice_create(IDirect3DDeviceImpl **obj, IDirectDrawImpl *d3d, IDirectDrawSurfaceImpl *surface, int version)
+d3ddevice_create(IDirect3DDeviceImpl **obj, IDirectDrawImpl *d3d, IDirectDrawSurfaceImpl *surface, int version, D3D_MODE d3d_mode)
 {
     IDirect3DDeviceImpl *object;
     IDirect3DDeviceGLImpl *gl_object;
@@ -4029,6 +4032,7 @@
     device_context = GetDC(surface->ddraw_owner->window);
     gl_object->display = get_display(device_context);
     gl_object->drawable = get_drawable(device_context);
+    gl_object->d3d_mode = d3d_mode;
     ReleaseDC(surface->ddraw_owner->window,device_context);
 
     ENTER_GL();
@@ -4057,7 +4061,8 @@
     
     /* Look for the front buffer and override its surface's Flip method (if in double buffering) */
     for (surf = surface; surf != NULL; surf = surf->surface_owner) {
-        if ((surf->surface_desc.ddsCaps.dwCaps&(DDSCAPS_FLIP|DDSCAPS_FRONTBUFFER)) == (DDSCAPS_FLIP|DDSCAPS_FRONTBUFFER)) {
+        if ((surf->surface_desc.ddsCaps.dwCaps&(DDSCAPS_FLIP|DDSCAPS_FRONTBUFFER)) == (DDSCAPS_FLIP|DDSCAPS_FRONTBUFFER)
+	    && (gl_object->d3d_mode == D3D_FULL)) {
             surf->aux_ctx  = (LPVOID) object;
             surf->aux_data = (LPVOID) gl_object->drawable;
             surf->aux_flip = opengl_flip;
@@ -4076,16 +4081,24 @@
 	for (surf2 = surf; surf2->prev_attached != NULL; surf2 = surf2->prev_attached) ;
 	for (; surf2 != NULL; surf2 = surf2->next_attached) {
 	    TRACE(" checking surface %p :", surf2);
-	    if (((surf2->surface_desc.ddsCaps.dwCaps & (DDSCAPS_3DDEVICE)) == (DDSCAPS_3DDEVICE)) &&
+	    if ((((surf2->surface_desc.ddsCaps.dwCaps & (DDSCAPS_3DDEVICE)) == (DDSCAPS_3DDEVICE)) ||
+		((surf2->surface_desc.ddsCaps.dwCaps & (DDSCAPS_BACKBUFFER)) == (DDSCAPS_BACKBUFFER)) ||
+		((surf2->surface_desc.ddsCaps.dwCaps & (DDSCAPS_FRONTBUFFER)) == (DDSCAPS_FRONTBUFFER))) &&
 		((surf2->surface_desc.ddsCaps.dwCaps & (DDSCAPS_ZBUFFER)) != (DDSCAPS_ZBUFFER))) {
+
 	        /* Override the Lock / Unlock function for all these surfaces */
 	        surf2->lock_update_prev = surf2->lock_update;
 	        surf2->lock_update = d3ddevice_lock_update;
 		surf2->unlock_update_prev = surf2->unlock_update;
 		surf2->unlock_update = d3ddevice_unlock_update;
-		/* And install also the blt / bltfast overrides */
-		surf2->aux_blt = d3ddevice_blt;
-		surf2->aux_bltfast = d3ddevice_bltfast;
+		
+		/* And install also the blt / bltfast overrides but only when the d3d backend
+		is used for 3d purposes. */
+		if(gl_object->d3d_mode == D3D_FULL)
+		{
+		    surf2->aux_blt = d3ddevice_blt;
+		    surf2->aux_bltfast = d3ddevice_bltfast;
+		}
 		
 		TRACE(" overriding direct surface access.\n");
 	    } else {
@@ -4161,8 +4174,13 @@
     /* glDisable(GL_DEPTH_TEST); Need here to check for the presence of a ZBuffer and to reenable it when the ZBuffer is attached */
     LEAVE_GL();
 
-    gl_object->state[WINE_GL_BUFFER_BACK] = SURFACE_GL;
-    gl_object->state[WINE_GL_BUFFER_FRONT] = SURFACE_GL;
+    if(gl_object->d3d_mode == D3D_FULL) {
+	gl_object->state[WINE_GL_BUFFER_BACK] = SURFACE_GL;
+	gl_object->state[WINE_GL_BUFFER_FRONT] = SURFACE_GL;
+    } else {
+	gl_object->state[WINE_GL_BUFFER_BACK] = SURFACE_MEMORY_DIRTY;
+	gl_object->state[WINE_GL_BUFFER_FRONT] = SURFACE_MEMORY_DIRTY;
+    }
     
     /* fill_device_capabilities(d3d->ddraw); */    
     
@@ -4176,7 +4194,8 @@
     TRACE(" creating implementation at %p.\n", *obj);
 
     /* And finally warn D3D that this device is now present */
-    object->d3d->d3d_added_device(object->d3d, object);
+    if(gl_object->d3d_mode == D3D_FULL)
+	object->d3d->d3d_added_device(object->d3d, object);
 
     InitDefaultStateBlock(&object->state_block, object->version);
     /* Apply default render state and texture stage state values */
diff -u /var/wine/dlls/ddraw/direct3d_opengl.c ./direct3d_opengl.c
--- /var/wine/dlls/ddraw/direct3d_opengl.c	2005-11-02 15:18:07.000000000 -0500
+++ ./direct3d_opengl.c	2005-11-20 06:21:42.000000000 -0500
@@ -151,7 +151,7 @@
     IDirect3DDeviceImpl *lpd3ddev;
     HRESULT ret_value;
 
-    ret_value = d3ddevice_create(&lpd3ddev, This, lpDDS, version);
+    ret_value = d3ddevice_create(&lpd3ddev, This, lpDDS, version, D3D_FULL);
     if (FAILED(ret_value)) return ret_value;
     
     if ((iid == NULL) ||
diff -u /var/wine/dlls/ddraw/main.c ./main.c
--- /var/wine/dlls/ddraw/main.c	2005-08-08 13:35:29.000000000 -0400
+++ ./main.c	2005-11-22 12:49:15.000000000 -0500
@@ -39,6 +39,7 @@
 #include "winnls.h"
 #include "winerror.h"
 #include "wingdi.h"
+#include "winreg.h"
 
 #include "ddraw.h"
 #include "d3d.h"
@@ -70,6 +71,22 @@
 } DirectDrawEnumerateProcData;
 
 BOOL opengl_initialized = 0;
+BOOL use_ddraw_over_d3d = 0;
+
+#define IS_OPTION_TRUE(ch) \
+    ((ch) == 'y' || (ch) == 'Y' || (ch) == 't' || (ch) == 'T' || (ch) == '1')
+
+/*
+ * Get a config key from either the app-specific or the default config
+ */
+
+inline static DWORD get_config_key( HKEY defkey, HKEY appkey, const char *name,
+                                    char *buffer, DWORD size )
+{
+    if (appkey && !RegQueryValueExA( appkey, name, 0, NULL, (LPBYTE)buffer, &size )) return 0;
+    if (defkey && !RegQueryValueExA( defkey, name, 0, NULL, (LPBYTE)buffer, &size )) return 0;
+    return ERROR_FILE_NOT_FOUND;
+}
 
 #ifdef HAVE_OPENGL
 
@@ -161,6 +178,41 @@
     return FALSE;
 }
 
+static void setup_ddraw_options()
+{
+    char buffer[MAX_PATH+16];
+    HKEY hkey, appkey = 0;
+    DWORD len;
+
+    buffer[MAX_PATH]='\0';
+
+    /* @@ Wine registry key: HKCU\Software\Wine\DirectDraw */
+    if (RegOpenKeyA( HKEY_CURRENT_USER, "Software\\Wine\\DirectDraw", &hkey )) hkey = 0;
+
+    len = GetModuleFileNameA( 0, buffer, MAX_PATH );
+    if (len && len < MAX_PATH)
+    {
+        HKEY tmpkey;
+        /* @@ Wine registry key: HKCU\Software\Wine\AppDefaults\app.exe\DirectDraw */
+        if (!RegOpenKeyA( HKEY_CURRENT_USER, "Software\\Wine\\AppDefaults", &tmpkey ))
+        {
+            char *p, *appname = buffer;
+            if ((p = strrchr( appname, '/' ))) appname = p + 1;
+            if ((p = strrchr( appname, '\\' ))) appname = p + 1;
+            strcat( appname, "\\DirectDraw" );
+            TRACE("appname = [%s]\n", appname);
+            if (RegOpenKeyA( tmpkey, appname, &appkey )) appkey = 0;
+            RegCloseKey( tmpkey );
+        }
+    }
+
+    /* get options */
+#ifdef HAVE_OPENGL
+    if (!get_config_key( hkey, appkey, "UseDDrawOverD3D", buffer, sizeof(buffer) ))
+            use_ddraw_over_d3d = IS_OPTION_TRUE( buffer[0] );
+#endif
+}
+
 /*******************************************************************************
  * DirectDrawEnumerateExA (DDRAW.@)
  *
@@ -665,6 +717,7 @@
         opengl_initialized = DDRAW_bind_to_opengl();
 #endif /* HAVE_OPENGL */
         s3tc_initialized = DDRAW_bind_to_s3tc();
+        setup_ddraw_options();
     }
 
     if (DDRAW_num_drivers > 0)
diff -u /var/wine/dlls/ddraw/opengl_private.h ./opengl_private.h
--- /var/wine/dlls/ddraw/opengl_private.h	2005-11-02 15:18:07.000000000 -0500
+++ ./opengl_private.h	2005-12-04 19:57:26.000000000 -0500
@@ -45,6 +45,11 @@
     SURFACE_MEMORY_DIRTY
 } SURFACE_STATE;
 
+typedef enum {
+    D3D_BLIT,
+    D3D_FULL,
+} D3D_MODE;
+
 /* This structure is used for the 'd3d_private' field of the IDirectDraw structure */
 typedef struct IDirect3DGLImpl
 {
@@ -105,6 +110,9 @@
     
     GLXContext gl_context;
 
+    /* This option tells if we are using the D3D backend for 2d or 3d purposes */
+    D3D_MODE d3d_mode;
+    
     /* This stores the textures which are actually bound to the GL context */
     IDirectDrawSurfaceImpl *current_bound_texture[MAX_TEXTURES];
 
@@ -183,7 +191,7 @@
 extern HRESULT d3dmaterial_create(IDirect3DMaterialImpl **obj, IDirectDrawImpl *d3d);
 extern HRESULT d3dviewport_create(IDirect3DViewportImpl **obj, IDirectDrawImpl *d3d);
 extern HRESULT d3dvertexbuffer_create(IDirect3DVertexBufferImpl **obj, IDirectDrawImpl *d3d, LPD3DVERTEXBUFFERDESC lpD3DVertBufDesc, DWORD dwFlags);
-extern HRESULT d3ddevice_create(IDirect3DDeviceImpl **obj, IDirectDrawImpl *d3d, IDirectDrawSurfaceImpl *surface, int version);
+extern HRESULT d3ddevice_create(IDirect3DDeviceImpl **obj, IDirectDrawImpl *d3d, IDirectDrawSurfaceImpl *surface, int version, D3D_MODE d3d_mode);
 
 /* Used for Direct3D to request the device to enumerate itself */
 extern HRESULT d3ddevice_enumerate(LPD3DENUMDEVICESCALLBACK cb, LPVOID context, DWORD version) ;
@@ -210,7 +218,7 @@
 
 /* Memory to texture conversion code. Split in three functions to do some optimizations. */
 extern HRESULT upload_surface_to_tex_memory_init(IDirectDrawSurfaceImpl *surface, GLuint level, GLenum *prev_internal_format,
-						 BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height);
+						 BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height, D3D_MODE d3d_mode);
 extern HRESULT upload_surface_to_tex_memory(RECT *rect, DWORD xoffset, DWORD yoffset, void **temp_buffer);
 extern HRESULT upload_surface_to_tex_memory_release(void);
 
diff -u /var/wine/dlls/ddraw/opengl_utils.c ./opengl_utils.c
--- /var/wine/dlls/ddraw/opengl_utils.c	2005-11-13 21:18:42.000000000 -0500
+++ ./opengl_utils.c	2005-12-04 19:57:09.000000000 -0500
@@ -642,7 +642,8 @@
 static int current_storage_width;
 
 HRESULT upload_surface_to_tex_memory_init(IDirectDrawSurfaceImpl *surf_ptr, GLuint level, GLenum *current_internal_format,
-					  BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height)
+					  BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height, 
+					  D3D_MODE d3d_mode)
 {
     const DDPIXELFORMAT * const src_pf = &(surf_ptr->surface_desc.u4.ddpfPixelFormat);
     BOOL error = FALSE;
diff -u /var/wine/dlls/ddraw/surface_main.c ./surface_main.c
--- /var/wine/dlls/ddraw/surface_main.c	2005-11-20 23:39:36.000000000 -0500
+++ ./surface_main.c	2005-11-21 21:22:31.000000000 -0500
@@ -193,7 +193,7 @@
         IDirect3DDeviceImpl *d3ddevimpl;
 	HRESULT ret_value;
 
-	ret_value = d3ddevice_create(&d3ddevimpl, This->ddraw_owner, This, 1);
+	ret_value = d3ddevice_create(&d3ddevimpl, This->ddraw_owner, This, 1, D3D_FULL);
 	if (FAILED(ret_value)) return ret_value;
 
 	*ppObj = ICOM_INTERFACE(d3ddevimpl, IDirect3DDevice);
diff -u /var/wine/dlls/ddraw/texture.c ./texture.c
--- /var/wine/dlls/ddraw/texture.c	2005-10-12 17:05:40.000000000 -0400
+++ ./texture.c	2005-11-20 23:34:02.000000000 -0500
@@ -302,7 +302,7 @@
 	    }
 	    
 	    if (upload_surface_to_tex_memory_init(surf_ptr, surf_ptr->mipmap_level, &(gl_surf_ptr->current_internal_format),
-						  gl_surf_ptr->initial_upload_done == FALSE, TRUE, 0, 0) == DD_OK) {
+						  gl_surf_ptr->initial_upload_done == FALSE, TRUE, 0, 0, D3D_FULL) == DD_OK) {
 	        upload_surface_to_tex_memory(NULL, 0, 0, &(gl_surf_ptr->surface_ptr));
 		upload_surface_to_tex_memory_release();
 		gl_surf_ptr->dirty_flag = SURFACE_MEMORY;
@@ -428,7 +428,7 @@
 		  (width == surf_ptr->surface_desc.dwWidth) && (height == surf_ptr->surface_desc.dwHeight))) {
 		/* If not 'full size' and the surface is dirty, first flush it to GL before doing the copy. */
 	        if (upload_surface_to_tex_memory_init(surf_ptr, surf_ptr->mipmap_level, &(gl_surf_ptr->current_internal_format),
-						      gl_surf_ptr->initial_upload_done == FALSE, TRUE, 0, 0) == DD_OK) {
+						      gl_surf_ptr->initial_upload_done == FALSE, TRUE, 0, 0, D3D_FULL) == DD_OK) {
 		    upload_surface_to_tex_memory(NULL, 0, 0, &(gl_surf_ptr->surface_ptr));
 		    upload_surface_to_tex_memory_release();
 		    gl_surf_ptr->dirty_flag = SURFACE_MEMORY;
@@ -444,7 +444,7 @@
 	    /* This is a hack and would need some clean-up :-) */
 	    if (gl_surf_ptr->initial_upload_done == FALSE) {
 		if (upload_surface_to_tex_memory_init(surf_ptr, surf_ptr->mipmap_level, &(gl_surf_ptr->current_internal_format),
-						      TRUE, TRUE, 0, 0) == DD_OK) {
+						      TRUE, TRUE, 0, 0, D3D_FULL) == DD_OK) {
 		    upload_surface_to_tex_memory(NULL, 0, 0, &(gl_surf_ptr->surface_ptr));
 		    upload_surface_to_tex_memory_release();
 		    gl_surf_ptr->dirty_flag = SURFACE_MEMORY;
diff -u ./ddraw_main.c ../ddraw/ddraw_main.c
--- ./ddraw_main.c	2005-12-04 20:13:14.000000000 -0500
+++ ../ddraw/ddraw_main.c	2005-11-21 17:00:45.000000000 -0500
@@ -689,7 +689,7 @@
 	/* create primary surface & backbuffers */
 	hr = create_primary(This, pDDSD, ppSurf, pUnkOuter);
 	
-	if(opengl_initialized && use_ddraw_over_d3d) {
+	if(use_ddraw_over_d3d) {
 	    IDirect3DDeviceImpl *dummy;
 	    d3ddevice_create(&dummy, This, *ppSurf, 1, D3D_BLIT);
 	}
diff -u ./gl_api.h ../ddraw/gl_api.h
--- ./gl_api.h	2005-12-04 19:57:54.000000000 -0500
+++ ../ddraw/gl_api.h	2005-12-04 20:43:44.000000000 -0500
@@ -107,6 +107,7 @@
 GL_API_FUNCTION(glTexEnvf)
 GL_API_FUNCTION(glTexEnvfv)
 GL_API_FUNCTION(glTexEnvi)
+GL_API_FUNCTION(glTexImage1D)
 GL_API_FUNCTION(glTexImage2D)
 GL_API_FUNCTION(glTexParameteri)
 GL_API_FUNCTION(glTexParameterfv)
diff -u ./gl_private.h ../ddraw/gl_private.h
--- ./gl_private.h	2005-12-04 19:56:27.000000000 -0500
+++ ../ddraw/gl_private.h	2005-11-22 12:47:25.000000000 -0500
@@ -67,6 +67,9 @@
 #define GL_TEXTURE7_WINE                        0x84C7
 #define GL_MAX_TEXTURE_UNITS_WINE               0x84E2
 
+#define GL_FRAGMENT_PROGRAM_ARB_WINE            0x8804
+#define GL_PROGRAM_FORMAT_ASCII_ARB_WINE        0x8875
+
 #ifndef GLPRIVATE_NO_REDEFINE
 
 #define glAlphaFunc pglAlphaFunc
@@ -150,6 +153,7 @@
 #define glTexEnvf pglTexEnvf
 #define glTexEnvfv pglTexEnvfv
 #define glTexEnvi pglTexEnvi
+#define glTexImage1D pglTexImage1D
 #define glTexImage2D pglTexImage2D
 #define glTexParameteri pglTexParameteri
 #define glTexParameterfv pglTexParameterfv
diff -u ./opengl_private.h ../ddraw/opengl_private.h
--- ./opengl_private.h	2005-12-04 19:57:26.000000000 -0500
+++ ../ddraw/opengl_private.h	2005-11-20 10:28:31.000000000 -0500
@@ -180,6 +180,14 @@
                                  GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
     void (*glCompressedTexSubImage2D)(GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                     GLsizei width, GLsizei height, GLsizei imageSize, const GLvoid *data);
+    /* Paletted texture support */
+    BOOLEAN paletted_texture;
+    void (*glColorTable)(GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *);
+    /* Fragment program support */ 
+    BOOLEAN fragment_program;
+    void (*glBindProgramARB)(GLenum, GLuint);
+    void (*glGenProgramsARB)(GLsizei, GLuint *);
+    void (*glProgramStringARB)(GLenum, GLenum, GLsizei, const GLvoid *);
 } GL_EXTENSIONS_LIST; 
 extern GL_EXTENSIONS_LIST GL_extensions;
 
diff -u ./opengl_utils.c ../ddraw/opengl_utils.c
--- ./opengl_utils.c	2005-12-04 19:57:09.000000000 -0500
+++ ../ddraw/opengl_utils.c	2005-12-04 19:46:54.000000000 -0500
@@ -640,6 +640,15 @@
 static DWORD current_tex_height;
 static GLuint current_alignement_constraints;
 static int current_storage_width;
+static int use_fragment_program;
+
+static unsigned int shader_id;
+const char *fragment_palette_conversion =
+    "!!ARBfp1.0\n"
+    "TEMP index;\n"
+    "TEX index.x, fragment.texcoord[0], texture[0], 2D;\n" /* store the red-component of the current pixel */
+    "TEX result.color, index, texture[1], 1D;\n" /* use the red-component as a index in the palette to get the final color */
+    "END";
 
 HRESULT upload_surface_to_tex_memory_init(IDirectDrawSurfaceImpl *surf_ptr, GLuint level, GLenum *current_internal_format,
 					  BOOLEAN need_to_alloc, BOOLEAN need_alpha_ck, DWORD tex_width, DWORD tex_height, 
@@ -705,10 +714,117 @@
 	/* ****************
 	   Paletted Texture
 	   **************** */
-	current_format = GL_RGBA;
-	internal_format = GL_RGBA;
-	current_pixel_format = GL_UNSIGNED_BYTE;
-	convert_type = CONVERT_PALETTED;
+
+	/* First check if the hardware can do the conversion for us using the paletted texture extension.
+	In case we are in D3D_BLIT mode we can also use a fragment program. If neither of both is supported
+	we should do the conversion in software but that is SLOW. */
+	if(GL_extensions.paletted_texture)
+	{	   
+	    IDirectDrawPaletteImpl* pal = current_surface->palette;
+	    int i;
+	    BYTE table[256][4];
+
+	    if (pal == NULL) {
+		/* Upload a black texture. The real one will be uploaded on palette change */
+		WARN("Palettized texture Loading with a NULL palette !\n");
+		memset(table, 0, 256 * 4);
+	    } else {
+		for (i = 0; i < 256; i++) {
+		    table[i][0] = pal->palents[i].peRed;
+		    table[i][1] = pal->palents[i].peGreen;
+		    table[i][2] = pal->palents[i].peBlue;
+		    if ((surf_ptr->surface_desc.dwFlags & DDSD_CKSRCBLT) && 
+			(i >= surf_ptr->surface_desc.ddckCKSrcBlt.dwColorSpaceLowValue) &&
+			(i <= surf_ptr->surface_desc.ddckCKSrcBlt.dwColorSpaceHighValue))
+			/* We should maybe here put a more 'neutral' color than the standard bright purple
+			   one often used by application to prevent the nice purple borders when bi-linear
+			   filtering is on */
+			table[i][3] = 0x00;
+		    else
+			table[i][3] = 0xFF;
+		}
+	    }
+
+	    current_format = GL_COLOR_INDEX;
+	    internal_format = GL_COLOR_INDEX8_EXT;
+	    current_pixel_format = GL_UNSIGNED_BYTE;
+	    convert_type = NO_CONVERSION;
+
+	    GL_extensions.glColorTable(GL_TEXTURE_2D,GL_RGBA,256,GL_RGBA,GL_UNSIGNED_BYTE, table);
+	}  else if(GL_extensions.fragment_program && (d3d_mode == D3D_BLIT)) {	   
+	    IDirectDrawPaletteImpl* pal = current_surface->palette;
+	    int i;
+	    BYTE table[512][4];
+	    static unsigned int have_fragment_program = 0;
+
+	    if (pal == NULL) {
+		/* Upload a black texture. The real one will be uploaded on palette change */
+		WARN("Palettized texture Loading with a NULL palette !\n");
+		memset(table, 0, 512 * 4);
+	    } else {
+		for (i = 0; i < 512; i+=2) {
+		    table[i][0] = pal->palents[i/2].peRed;
+		    table[i][1] = pal->palents[i/2].peGreen;
+		    table[i][2] = pal->palents[i/2].peBlue;
+		    table[i][3] = 0xFF;
+
+		    table[i+1][0] = table[i][0];
+		    table[i+1][1] = table[i][1];
+		    table[i+1][2] = table[i][2];
+		    table[i+1][3] = table[i][3];
+		}
+	    }
+		
+	    /* Create the fragment program if we don't have it */
+	    if(!have_fragment_program)
+	    {
+	        glEnable(GL_FRAGMENT_PROGRAM_ARB_WINE);
+	        GL_extensions.glGenProgramsARB(1, &shader_id);
+	        GL_extensions.glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB_WINE, shader_id);
+	        GL_extensions.glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB_WINE, GL_PROGRAM_FORMAT_ASCII_ARB_WINE, strlen(fragment_palette_conversion), (const GLbyte *)fragment_palette_conversion);
+	        glDisable(GL_FRAGMENT_PROGRAM_ARB_WINE);
+	        have_fragment_program=1;
+	    }
+		
+	    /* We emulate paletted texture support using a simple fragment program.
+	    The palette consisting of 256 colors itself is uploaded using a 512x1 texture with two
+	    entries for each color. The reason for this is that in the shader texture coordinates
+	    and indexing in textures works using floats [0.0, 1.0]. Further the texture magnification/minifying
+	    function is set to GL_NEAREST so that we get discrete color levels. The entry closest to
+	    a specific index is chosen. Due to precision problems in some cases the wrong color is sampled.
+	    Uploading each color twice fixed this problem for both Mesa 6.4 and a Nvidia Geforce6 card. Note that
+	    this precision problem didn't appear on a GeforceFX card using the same drivers.
+	    
+	    Further the (8-bit) 2D texture is uploaded with current_format set to GL_RED, so that all
+	    pixel data is stored in the red channel. For the color conversion the red part of the pixel
+	    is then used as an index in the palette to find the 'real' color. */
+
+	    glEnable(GL_FRAGMENT_PROGRAM_ARB_WINE);
+	    GL_extensions.glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB_WINE, shader_id);
+	    use_fragment_program = 1;
+		
+	    /* The palette will be stored in texture unit 1. We know for
+	    sure that there are atleast two units as this is required for
+	    cards in order to support the fragment program extension. */		
+	    GL_extensions.glActiveTexture(GL_TEXTURE1_WINE);
+    	    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    	    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); /* Make sure we have discrete color levels. */
+    	    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    	    glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 512, 1, GL_RGBA, GL_UNSIGNED_BYTE, table);
+
+	    /* Switch back to unit 0 in which the 2D texture will be stored. */
+	    GL_extensions.glActiveTexture(GL_TEXTURE0_WINE);
+
+	    current_format = GL_RED; /* Put all the color information in the red channel. */
+	    internal_format = GL_RGBA;
+	    current_pixel_format = GL_UNSIGNED_BYTE;
+	    convert_type = NO_CONVERSION;
+	} else {
+	    current_format = GL_RGBA;
+	    internal_format = GL_RGBA;
+	    current_pixel_format = GL_UNSIGNED_BYTE;
+	    convert_type = CONVERT_PALETTED;
+	}
     } else if (src_pf->dwFlags & DDPF_RGB) {
 	/* ************
 	   RGB Textures
@@ -1323,5 +1439,12 @@
 {
     current_surface = NULL;
 
+    /* Disable the fragment program after the 8-bit texture is uploaded. */
+    if(use_fragment_program)
+    {
+	glDisable(GL_FRAGMENT_PROGRAM_ARB_WINE);
+	use_fragment_program = 0;
+    }
+
     return DD_OK;
 }


Reply via email to