Hi, Here's a new patch for review. I won't send it until Monday because I want to do some performance testing(see below), and because the ddraw patch is queued below that, but it needs more testing.
The changes are essentially some bugfixing(one NULL pointer exception), and I dropped the optimization for full buffer maps for now. Oh, and some helper functions, but beyond that it's the same. Am 16.12.2009 um 14:39 schrieb Henri Verbeet: > 2009/12/16 Stefan Dösinger <[email protected]>: >> Actually, since we're not going to Unmap-flush and PreLoad() the buffer(its >> either-or), I can use the same data structure for the stack and list(or >> well, dynamic array). I just have to watch out not to remove elements in any >> place in unlock that affects DOUBLEBUFFER buffers. >> > Sounds fragile, but I'm willing to wait and see what the final patch looks > like. I'm not doing that for now, I want to test if there's any performance difference. If I am using that, it might collide with range merges, or catching locks of the whole buffer(and then not recording any more partial locks). I'll try to find a game where this matters and benchmark it, but I think the difference will be too small to notice it.
From 85d525dde06f58f0ac216ebf4047ef6fbfc91b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20D=C3=B6singer?= <[email protected]> Date: Thu, 17 Dec 2009 20:59:47 +0100 Subject: [PATCH 5/8] WineD3D: Track separate dirty ranges in buffers --- dlls/wined3d/buffer.c | 215 ++++++++++++++++++++++++--------------- dlls/wined3d/wined3d_private.h | 17 ++- 2 files changed, 143 insertions(+), 89 deletions(-) diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index 8a15445..7355c2a 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -34,6 +34,47 @@ WINE_DEFAULT_DEBUG_CHANNEL(d3d); #define VB_MAXDECLCHANGES 100 /* After that number we stop converting */ #define VB_RESETDECLCHANGE 1000 /* Reset the changecount after that number of draws */ +static inline BOOL buffer_add_dirty_area(struct wined3d_buffer *This, UINT offset, UINT size) +{ + if (!This->buffer_object) return TRUE; + + if (This->maps_size <= This->modified_areas) + { + void *new = HeapReAlloc(GetProcessHeap(), 0, This->maps, + This->maps_size * 2 * sizeof(*This->maps)); + if (!new) + { + ERR("Out of memory\n"); + return FALSE; + } + else + { + This->maps = new; + This->maps_size *= 2; + } + } + + if(!offset && !size) + { + size = This->resource.size; + } + + This->maps[This->modified_areas].offset = offset; + This->maps[This->modified_areas].size = size; + This->modified_areas++; + return TRUE; +} + +static inline void buffer_clear_dirty_areas(struct wined3d_buffer *This) +{ + This->modified_areas = 0; +} + +static inline BOOL buffer_is_dirty(struct wined3d_buffer *This) +{ + return This->modified_areas != 0; +} + /* Context activation is done by the caller. */ static void buffer_create_buffer_object(struct wined3d_buffer *This) { @@ -63,6 +104,7 @@ static void buffer_create_buffer_object(struct wined3d_buffer *This) if (!This->buffer_object || error != GL_NO_ERROR) { ERR("Failed to create a VBO with error %s (%#x)\n", debug_glerror(error), error); + LEAVE_GL(); goto fail; } @@ -75,6 +117,7 @@ static void buffer_create_buffer_object(struct wined3d_buffer *This) if (error != GL_NO_ERROR) { ERR("Failed to bind the VBO with error %s (%#x)\n", debug_glerror(error), error); + LEAVE_GL(); goto fail; } @@ -99,29 +142,29 @@ static void buffer_create_buffer_object(struct wined3d_buffer *This) */ GL_EXTCALL(glBufferDataARB(This->buffer_type_hint, This->resource.size, This->resource.allocatedMemory, gl_usage)); error = glGetError(); + LEAVE_GL(); if (error != GL_NO_ERROR) { ERR("glBufferDataARB failed with error %s (%#x)\n", debug_glerror(error), error); goto fail; } - LEAVE_GL(); - This->buffer_object_size = This->resource.size; This->buffer_object_usage = gl_usage; - This->dirty_start = 0; - This->dirty_end = This->resource.size; if(This->flags & WINED3D_BUFFER_DOUBLEBUFFER) { - This->flags |= WINED3D_BUFFER_DIRTY; + if(!buffer_add_dirty_area(This, 0, 0)) + { + ERR("buffer_add_dirty_area failed, this is not expected\n"); + goto fail; + } } else { HeapFree(GetProcessHeap(), 0, This->resource.heapMemory); This->resource.allocatedMemory = NULL; This->resource.heapMemory = NULL; - This->flags &= ~WINED3D_BUFFER_DIRTY; } return; @@ -129,9 +172,14 @@ static void buffer_create_buffer_object(struct wined3d_buffer *This) fail: /* Clean up all vbo init, but continue because we can work without a vbo :-) */ ERR("Failed to create a vertex buffer object. Continuing, but performance issues may occur\n"); - if (This->buffer_object) GL_EXTCALL(glDeleteBuffersARB(1, &This->buffer_object)); + if (This->buffer_object) + { + ENTER_GL(); + GL_EXTCALL(glDeleteBuffersARB(1, &This->buffer_object)); + LEAVE_GL(); + } This->buffer_object = 0; - LEAVE_GL(); + buffer_clear_dirty_areas(This); return; } @@ -620,6 +668,7 @@ static void STDMETHODCALLTYPE buffer_UnLoad(IWineD3DBuffer *iface) LEAVE_GL(); This->buffer_object = 0; This->flags |= WINED3D_BUFFER_CREATEBO; /* Recreate the buffer object next load */ + buffer_clear_dirty_areas(This); context_release(context); } @@ -637,6 +686,7 @@ static ULONG STDMETHODCALLTYPE buffer_Release(IWineD3DBuffer *iface) buffer_UnLoad(iface); resource_cleanup((IWineD3DResource *)iface); This->resource.parent_ops->wined3d_object_destroyed(This->resource.parent); + HeapFree(GetProcessHeap(), 0, This->maps); HeapFree(GetProcessHeap(), 0, This); } @@ -683,7 +733,7 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) { struct wined3d_buffer *This = (struct wined3d_buffer *)iface; IWineD3DDeviceImpl *device = This->resource.device; - UINT start = 0, end = 0, vertices; + UINT start = 0, end = 0, len = 0, vertices; struct wined3d_context *context; BOOL decl_changed = FALSE; unsigned int i, j; @@ -715,7 +765,7 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) This->flags |= WINED3D_BUFFER_HASDESC; } - if (!decl_changed && !(This->flags & WINED3D_BUFFER_HASDESC && This->flags & WINED3D_BUFFER_DIRTY)) + if (!decl_changed && !(This->flags & WINED3D_BUFFER_HASDESC && buffer_is_dirty(This))) { context_release(context); return; @@ -762,28 +812,14 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) { /* The declaration changed, reload the whole buffer */ WARN("Reloading buffer because of decl change\n"); - start = 0; - end = This->resource.size; - } - else - { - /* No decl change, but dirty data, reload the changed stuff */ - if (This->conversion_shift) + buffer_clear_dirty_areas(This); + if(!buffer_add_dirty_area(This, 0, 0)) { - if (This->dirty_start != 0 || This->dirty_end != 0) - { - FIXME("Implement partial buffer loading with shifted conversion\n"); - } + ERR("buffer_add_dirty_area failed, this is not expected\n"); + return; } - start = This->dirty_start; - end = This->dirty_end; } - /* Mark the buffer clean */ - This->flags &= ~WINED3D_BUFFER_DIRTY; - This->dirty_start = 0; - This->dirty_end = 0; - if(This->buffer_type_hint == GL_ELEMENT_ARRAY_BUFFER_ARB) { IWineD3DDeviceImpl_MarkStateDirty(This->resource.device, STATE_INDEXBUFFER); @@ -807,8 +843,14 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) ENTER_GL(); GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object)); checkGLcall("glBindBufferARB"); - GL_EXTCALL(glBufferSubDataARB(This->buffer_type_hint, start, end-start, This->resource.allocatedMemory + start)); - checkGLcall("glBufferSubDataARB"); + while(This->modified_areas) + { + This->modified_areas--; + start = This->maps[This->modified_areas].offset; + len = This->maps[This->modified_areas].size; + GL_EXTCALL(glBufferSubDataARB(This->buffer_type_hint, start, len, This->resource.allocatedMemory + start)); + checkGLcall("glBufferSubDataARB"); + } LEAVE_GL(); context_release(context); @@ -828,6 +870,15 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) TRACE("Shifted conversion\n"); data = HeapAlloc(GetProcessHeap(), 0, vertices * This->conversion_stride); + start = 0; + len = This->resource.size; + end = start + len; + + if (This->maps[0].offset || This->maps[0].size != This->resource.size) + { + FIXME("Implement partial buffer load with shifted conversion\n"); + } + for (i = start / This->stride; i < min((end / This->stride) + 1, vertices); ++i) { for (j = 0; j < This->stride; ++j) @@ -867,41 +918,50 @@ static void STDMETHODCALLTYPE buffer_PreLoad(IWineD3DBuffer *iface) else { data = HeapAlloc(GetProcessHeap(), 0, This->resource.size); - memcpy(data + start, This->resource.allocatedMemory + start, end - start); - for (i = start / This->stride; i < min((end / This->stride) + 1, vertices); ++i) + + while(This->modified_areas) { - for (j = 0; j < This->stride; ++j) + This->modified_areas--; + start = This->maps[This->modified_areas].offset; + len = This->maps[This->modified_areas].size; + end = start + len; + + memcpy(data + start, This->resource.allocatedMemory + start, end - start); + for (i = start / This->stride; i < min((end / This->stride) + 1, vertices); ++i) { - switch(This->conversion_map[j]) + for (j = 0; j < This->stride; ++j) { - case CONV_NONE: - /* Done already */ - j += 3; - break; - case CONV_D3DCOLOR: - fixup_d3dcolor((DWORD *) (data + i * This->stride + j)); - j += 3; - break; - - case CONV_POSITIONT: - fixup_transformed_pos((float *) (data + i * This->stride + j)); - j += 15; - break; - - case CONV_FLOAT16_2: - ERR("Did not expect FLOAT16 conversion in unshifted conversion\n"); - default: - FIXME("Unimplemented conversion %d in shifted conversion\n", This->conversion_map[j]); + switch(This->conversion_map[j]) + { + case CONV_NONE: + /* Done already */ + j += 3; + break; + case CONV_D3DCOLOR: + fixup_d3dcolor((DWORD *) (data + i * This->stride + j)); + j += 3; + break; + + case CONV_POSITIONT: + fixup_transformed_pos((float *) (data + i * This->stride + j)); + j += 15; + break; + + case CONV_FLOAT16_2: + ERR("Did not expect FLOAT16 conversion in unshifted conversion\n"); + default: + FIXME("Unimplemented conversion %d in shifted conversion\n", This->conversion_map[j]); + } } } - } - ENTER_GL(); - GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object)); - checkGLcall("glBindBufferARB"); - GL_EXTCALL(glBufferSubDataARB(This->buffer_type_hint, start, end - start, data + start)); - checkGLcall("glBufferSubDataARB"); - LEAVE_GL(); + ENTER_GL(); + GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object)); + checkGLcall("glBindBufferARB"); + GL_EXTCALL(glBufferSubDataARB(This->buffer_type_hint, start, len, data + start)); + checkGLcall("glBufferSubDataARB"); + LEAVE_GL(); + } } HeapFree(GetProcessHeap(), 0, data); @@ -922,27 +982,9 @@ static HRESULT STDMETHODCALLTYPE buffer_Map(IWineD3DBuffer *iface, UINT offset, TRACE("iface %p, offset %u, size %u, data %p, flags %#x\n", iface, offset, size, data, flags); - count = InterlockedIncrement(&This->lock_count); - - if (This->flags & WINED3D_BUFFER_DIRTY) - { - if (This->dirty_start > offset) This->dirty_start = offset; + if (!buffer_add_dirty_area(This, offset, size)) return E_OUTOFMEMORY; - if (size) - { - if (This->dirty_end < offset + size) This->dirty_end = offset + size; - } - else - { - This->dirty_end = This->resource.size; - } - } - else - { - This->dirty_start = offset; - if (size) This->dirty_end = offset + size; - else This->dirty_end = This->resource.size; - } + count = InterlockedIncrement(&This->lock_count); if(!(This->flags & WINED3D_BUFFER_DOUBLEBUFFER) && This->buffer_object) { @@ -964,10 +1006,6 @@ static HRESULT STDMETHODCALLTYPE buffer_Map(IWineD3DBuffer *iface, UINT offset, context_release(context); } } - else - { - This->flags |= WINED3D_BUFFER_DIRTY; - } *data = This->resource.allocatedMemory + offset; @@ -1018,6 +1056,7 @@ static HRESULT STDMETHODCALLTYPE buffer_Unmap(IWineD3DBuffer *iface) context_release(context); This->resource.allocatedMemory = NULL; + buffer_clear_dirty_areas(This); } else if (This->flags & WINED3D_BUFFER_HASDESC) { @@ -1154,5 +1193,15 @@ HRESULT buffer_init(struct wined3d_buffer *buffer, IWineD3DDeviceImpl *device, } } + buffer->maps = HeapAlloc(GetProcessHeap(), 0, sizeof(*buffer->maps)); + if (!buffer->maps) + { + ERR("Out of memory\n"); + buffer_UnLoad((IWineD3DBuffer *)buffer); + resource_cleanup((IWineD3DResource *)buffer); + return E_OUTOFMEMORY; + } + buffer->maps_size = 1; + return WINED3D_OK; } diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index e5e0ae1..a1356e9 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2344,11 +2344,16 @@ enum wined3d_buffer_conversion_type CONV_FLOAT16_2, /* Also handles FLOAT16_4 */ }; +struct wined3d_map_range +{ + UINT offset; + UINT size; +}; + #define WINED3D_BUFFER_OPTIMIZED 0x01 /* Optimize has been called for the buffer */ -#define WINED3D_BUFFER_DIRTY 0x02 /* Buffer data has been modified */ -#define WINED3D_BUFFER_HASDESC 0x04 /* A vertex description has been found */ -#define WINED3D_BUFFER_CREATEBO 0x08 /* Attempt to create a buffer object next PreLoad */ -#define WINED3D_BUFFER_DOUBLEBUFFER 0x10 /* Use a vbo and local allocated memory */ +#define WINED3D_BUFFER_HASDESC 0x02 /* A vertex description has been found */ +#define WINED3D_BUFFER_CREATEBO 0x04 /* Attempt to create a buffer object next PreLoad */ +#define WINED3D_BUFFER_DOUBLEBUFFER 0x08 /* Use a vbo and local allocated memory */ struct wined3d_buffer { @@ -2364,9 +2369,9 @@ struct wined3d_buffer LONG bind_count; DWORD flags; - UINT dirty_start; - UINT dirty_end; LONG lock_count; + struct wined3d_map_range *maps; + ULONG maps_size, modified_areas; /* conversion stuff */ UINT conversion_count; -- 1.6.4.4
From c30caec1dec66b2568b06bb6e26119a05954c467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20D=C3=B6singer?= <[email protected]> Date: Thu, 17 Dec 2009 22:44:07 +0100 Subject: [PATCH 8/8] WineD3D: Implement subrange flushing with GL_APPLE_flush_buffer_range --- dlls/wined3d/buffer.c | 24 +++++++++++++++++++++++- dlls/wined3d/wined3d_private.h | 1 + 2 files changed, 24 insertions(+), 1 deletions(-) diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c index 715c823..e225c9e 100644 --- a/dlls/wined3d/buffer.c +++ b/dlls/wined3d/buffer.c @@ -79,6 +79,7 @@ static inline BOOL buffer_is_dirty(struct wined3d_buffer *This) static void buffer_create_buffer_object(struct wined3d_buffer *This) { GLenum error, gl_usage; + const struct wined3d_gl_info *gl_info = &This->resource.device->adapter->gl_info; TRACE("Creating an OpenGL vertex buffer object for IWineD3DVertexBuffer %p Usage(%s)\n", This, debug_d3dusage(This->resource.usage)); @@ -133,6 +134,13 @@ static void buffer_create_buffer_object(struct wined3d_buffer *This) { TRACE("Gl usage = GL_DYNAMIC_DRAW_ARB\n"); gl_usage = GL_DYNAMIC_DRAW_ARB; + + if(gl_info->supported[APPLE_FLUSH_BUFFER_RANGE]) + { + GL_EXTCALL(glBufferParameteriAPPLE(This->buffer_type_hint, GL_BUFFER_FLUSHING_UNMAP_APPLE, GL_FALSE)); + checkGLcall("glBufferParameteriAPPLE(This->buffer_type_hint, GL_BUFFER_FLUSHING_UNMAP_APPLE, GL_FALSE)"); + This->flags |= WINED3D_BUFFER_FLUSH; + } } /* Reserve memory for the buffer. The amount of data won't change @@ -1019,6 +1027,7 @@ static HRESULT STDMETHODCALLTYPE buffer_Map(IWineD3DBuffer *iface, UINT offset, static HRESULT STDMETHODCALLTYPE buffer_Unmap(IWineD3DBuffer *iface) { struct wined3d_buffer *This = (struct wined3d_buffer *)iface; + ULONG i; TRACE("(%p)\n", This); @@ -1052,6 +1061,18 @@ static HRESULT STDMETHODCALLTYPE buffer_Unmap(IWineD3DBuffer *iface) context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD); ENTER_GL(); GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object)); + + if(This->flags & WINED3D_BUFFER_FLUSH) + { + for(i = 0; i < This->modified_areas; i++) + { + GL_EXTCALL(glFlushMappedBufferRangeAPPLE(This->buffer_type_hint, + This->maps[i].offset, + This->maps[i].size)); + checkGLcall("glFlushMappedBufferRangeAPPLE"); + } + } + GL_EXTCALL(glUnmapBufferARB(This->buffer_type_hint)); LEAVE_GL(); context_release(context); @@ -1134,7 +1155,8 @@ HRESULT buffer_init(struct wined3d_buffer *buffer, IWineD3DDeviceImpl *device, TRACE("size %#x, usage %#x, format %s, memory @ %p, iface @ %p.\n", buffer->resource.size, buffer->resource.usage, debug_d3dformat(buffer->resource.format_desc->format), buffer->resource.allocatedMemory, buffer); - dynamic_buffer_ok = FALSE; /* TODO: GL_APPLE_map_buffer_range, GL_ARB_map_buffer_range */ + /* TODO: GL_ARB_map_buffer_range */ + dynamic_buffer_ok = gl_info->supported[APPLE_FLUSH_BUFFER_RANGE]; /* Observations show that drawStridedSlow is faster on dynamic VBs than converting + * drawStridedFast (half-life 2 and others). diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index a1356e9..ec2627f 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2354,6 +2354,7 @@ struct wined3d_map_range #define WINED3D_BUFFER_HASDESC 0x02 /* A vertex description has been found */ #define WINED3D_BUFFER_CREATEBO 0x04 /* Attempt to create a buffer object next PreLoad */ #define WINED3D_BUFFER_DOUBLEBUFFER 0x08 /* Use a vbo and local allocated memory */ +#define WINED3D_BUFFER_FLUSH 0x10 /* Manual unmap flushing */ struct wined3d_buffer { -- 1.6.4.4
